library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.4
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Background

This file is for analysis of several coronavirus data sources focused on the US in 2020. Data include areas such as cases, hospitalizations, and deaths tracked to cornavirus, as well as all-cause deaths.

The previous version of this file includes many exploratory analysis components. This version is designed with the following portions of the previous code:

  1. Functions
  2. Run key analyses
  3. Save and load key files

COVID Tracking Project

The first section is for analysis of data from The COVID Tracking Project. This file contains data on positive tests, hospitalizations, deaths, and the like for coronavirus cases in the US. Downloaded data are unique by state and date.

Key Functions (COVID Tracking Project, USA Facts, CDC All-Cause Deaths)

Functions for working with data from the COVID Tracking Project, USA Facts, and the CDC All-Cause Death data are available in a separate file which is sourced below:

source("./Coronavirus_Statistics_Functions_v002.R")

Running Key Analyses (COVID Tracking Project)

The data from COVID Tracking Project can be loaded and analyzed using the functions above and a variable mapping file:

# STEP 0: Create a variable mapping file
varMapper <- c("cases"="Cases", 
               "newCases"="Increase in cases, most recent 30 days",
               "casesroll7"="Rolling 7-day mean cases", 
               "deaths"="Deaths", 
               "newDeaths"="Increase in deaths, most recent 30 days",
               "deathsroll7"="Rolling 7-day mean deaths", 
               "cpm"="Cases per million",
               "cpm7"="Cases per day (7-day rolling mean) per million", 
               "newcpm"="Increase in cases, most recent 30 days, per million",
               "dpm"="Deaths per million", 
               "dpm7"="Deaths per day (7-day rolling mean) per million", 
               "newdpm"="Increase in deaths, most recent 30 days, per million", 
               "hpm7"="Currently Hospitalized per million (7-day rolling mean)", 
               "tpm"="Tests per million", 
               "tpm7"="Tests per million per day (7-day rolling mean)"
               )


# Test function for hierarchical clustering with Vermont reassigned to New Hampshire
test_hier5 <- readRunCOVIDTrackingProject(thruLabel="Aug 20, 2020", 
                                          readFrom="./RInputFiles/Coronavirus/CV_downloaded_200820.csv", 
                                          hierarchical=TRUE,
                                          kCut=6, 
                                          reAssignState=list("VT"="NH"), 
                                          minShape=3, 
                                          ratioDeathvsCase = 5, 
                                          ratioTotalvsShape = 0.5, 
                                          minDeath=100, 
                                          minCase=10000
                                          )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          5546056        166127               6486294

## Observations: 9,449
## Variables: 53
## $ date                        <date> 2020-08-20, 2020-08-20, 2020-08-20, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 5332, 112449, 54765, 0, 196280, 644751,...
## $ negative                    <dbl> 307315, 784330, 593744, 1514, 922163, 9...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ hospitalizedCurrently       <dbl> 51, 1105, 499, NA, 1070, 6212, 238, 47,...
## $ hospitalizedCumulative      <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ inIcuCurrently              <dbl> NA, NA, NA, NA, 388, 1707, NA, NA, 26, ...
## $ inIcuCumulative             <dbl> NA, 1348, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 6, NA, 108, NA, 233, NA, NA, NA, 12, NA...
## $ onVentilatorCumulative      <dbl> NA, 734, 488, NA, NA, NA, NA, NA, NA, N...
## $ recovered                   <dbl> 1513, 44684, 48458, NA, 28471, NA, 5759...
## $ dataQualityGrade            <chr> "A", "B", "A", "C", "A+", "B", "A", "B"...
## $ lastUpdateEt                <chr> "8/20/2020 0:00", "8/20/2020 11:00", "8...
## $ dateModified                <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ checkTimeEt                 <chr> "8/19/2020 20:00", "8/20/2020 7:00", "8...
## $ death                       <dbl> 29, 1974, 641, 0, 4684, 11686, 1800, 44...
## $ hospitalized                <dbl> NA, 13330, 3790, NA, 21143, NA, 6784, 1...
## $ dateChecked                 <dttm> 2020-08-20 00:00:00, 2020-08-20 11:00:...
## $ totalTestsViral             <dbl> 312647, 891813, 648509, NA, 1116897, 10...
## $ positiveTestsViral          <dbl> 4970, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 307360, NA, 593744, NA, NA, NA, NA, NA,...
## $ positiveCasesViral          <dbl> 5332, 107483, 54765, 0, 194734, 644751,...
## $ deathConfirmed              <dbl> 29, 1905, NA, NA, 4429, NA, NA, 3572, N...
## $ deathProbable               <dbl> NA, 69, NA, NA, 255, NA, NA, 886, NA, 6...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 895207, NA, NA,...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, 1514, NA, NA, 645170, NA, 2...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 255456, NA, 150931, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 10406, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 140525, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsAntigen           <dbl> NA, NA, 10358, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ fips                        <dbl> 2, 1, 5, 60, 4, 6, 8, 9, 11, 10, 12, 13...
## $ positiveIncrease            <dbl> 85, 971, 549, 0, 723, 5920, 270, 118, 5...
## $ negativeIncrease            <dbl> 1713, 10462, 6680, 0, 6481, 81363, 4657...
## $ total                       <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResults            <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ totalTestResultsIncrease    <dbl> 1798, 11433, 7229, 0, 7204, 87283, 7348...
## $ posNeg                      <dbl> 312647, 896779, 648509, 1514, 1118443, ...
## $ deathIncrease               <dbl> 0, 30, 10, 0, 50, 163, 12, 1, 1, 0, 119...
## $ hospitalizedIncrease        <dbl> 0, 250, 47, 0, 123, 0, 3, 72, 0, 0, 450...
## $ hash                        <chr> "c83a1d575a597788adccbe170950b8d197754b...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp    tests     n
##   <lgl>        <dbl>  <dbl> <dbl>    <dbl> <dbl>
## 1 FALSE        29761    385    NA   392743   790
## 2 TRUE       5516295 165742    NA 69638073  8659
## Observations: 8,659
## Variables: 6
## $ date   <date> 2020-08-20, 2020-08-20, 2020-08-20, 2020-08-20, 2020-08-20,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 85, 971, 549, 723, 5920, 270, 118, 55, 75, 4555, 2759, 235, ...
## $ deaths <dbl> 0, 30, 10, 50, 163, 12, 1, 1, 0, 119, 55, 3, 8, 9, 27, 11, 0...
## $ hosp   <dbl> 51, 1105, 499, 1070, 6212, 238, 47, 78, 40, 5067, 2506, 187,...
## $ tests  <dbl> 1798, 11433, 7229, 7204, 87283, 7348, 12420, 2326, 2087, 295...
## Observations: 8,659
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-26,...
## $ state  <chr> "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 1, 0, 3, 1, 1, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 1, 1, 0, 3, 1, 1, 0, 0, ...
## $ cpm    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, 0.00000000, 0.01992331, 0.01992331, 0.01992331, ...
## $ dpm7   <dbl> NA, NA, NA, 0.00000000, 0.00000000, 0.00000000, 0.00000000, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, 0.00000000, 0.01992331, 0.01992331, 0.01992331, ...

## 
## Recency is defined as 2020-07-22 through current
## 
## Recency is defined as 2020-07-22 through current

# Test function for k-means clustering using the per capita data file previously created
test_km5 <- readRunCOVIDTrackingProject(thruLabel="Aug 20, 2020", 
                                        dfPerCapita=test_hier5$dfPerCapita,
                                        hierarchical=FALSE,
                                        makeCumulativePlots=FALSE,
                                        minShape=3, 
                                        ratioDeathvsCase = 5, 
                                        ratioTotalvsShape = 0.5, 
                                        minDeath=100, 
                                        minCase=10000, 
                                        nCenters=5,
                                        testCenters=1:10, 
                                        iter.max=20,
                                        nstart=10, 
                                        seed=2008261400
                                        )

## 
## Cluster means and counts
##                 1     2    3     4    5
## .           10.00 14.00 9.00 14.00 4.00
## totalCases   0.28  0.86 0.71  0.50 0.78
## totalDeaths  0.41  1.42 2.73  0.99 5.32
## cases_3      0.03  0.01 0.05  0.02 0.10
## deaths_3     0.27  0.08 0.13  0.06 0.15
## cases_4      0.07  0.06 0.26  0.12 0.50
## deaths_4     1.27  0.63 1.73  1.00 2.58
## cases_5      0.06  0.07 0.24  0.19 0.23
## deaths_5     0.69  0.66 1.77  1.50 1.59
## cases_6      0.08  0.18 0.11  0.15 0.07
## deaths_6     0.42  0.61 0.70  1.01 0.44
## cases_7      0.23  0.44 0.21  0.28 0.06
## deaths_7     0.70  1.60 0.42  0.81 0.18
## cases_8      0.16  0.22 0.14  0.19 0.04
## deaths_8     0.70  1.38 0.25  0.61 0.06

## 
## Recency is defined as 2020-07-22 through current
## 
## Recency is defined as 2020-07-22 through current

# Run in a session that has access to the old data files
# identical(test_hier5$useClusters, clustVec)  # TRUE
# identical(test_km5$useClusters, testCluster_km5$objCluster$cluster)  # TRUE

Cluster files produced using the new functions and the existing August 20, 2020 data are identical to ‘clustVec’ and ‘testCluster_km5’ produced in the _v001 code.

Further, new data is downloaded that is current through September 30, 2020, and the test_hier5 clusters are used for analysis:

# Test function for hierarchical clustering with Vermont reassigned to New Hampshire
locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201001.csv"
test_hier5_201001 <- readRunCOVIDTrackingProject(thruLabel="Sep 30, 2020", 
                                                 readFrom=locDownload, 
                                                 compareFile=test_hier5$dfRaw,
                                                 useClusters=test_hier5$useClusters
                                                 )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7198509        198929               7830770
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: totalTestResultsSource
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-09-30 2020-09-29 2020-09-28 2020-09-27 2020-09-26 2020-09-25 2020-09-24 2020-09-23 2020-09-22 2020-09-21 2020-09-20 2020-09-19 2020-09-18 2020-09-17 2020-09-16 2020-09-15 2020-09-14 2020-09-13 2020-09-12 2020-09-11 2020-09-10 2020-09-09 2020-09-08 2020-09-07 2020-09-06 2020-09-05 2020-09-04 2020-09-03 2020-09-02 2020-09-01 2020-08-31 2020-08-30 2020-08-29 2020-08-28 2020-08-27 2020-08-26 2020-08-25 2020-08-24 2020-08-23 2020-08-22 2020-08-21
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
##          date                  name newValue oldValue
## 1  2020-02-15      positiveIncrease        0        7
## 2  2020-02-16      positiveIncrease        0        7
## 3  2020-02-17      positiveIncrease        0       15
## 4  2020-02-18      positiveIncrease        0        9
## 5  2020-02-19      positiveIncrease        0       10
## 6  2020-02-20      positiveIncrease        0       13
## 7  2020-02-21      positiveIncrease        0       11
## 8  2020-02-22      positiveIncrease        0       13
## 9  2020-02-23      positiveIncrease        0       16
## 10 2020-02-24      positiveIncrease        0       26
## 11 2020-02-25      positiveIncrease        0       31
## 12 2020-02-26      positiveIncrease        0       29
## 13 2020-02-27      positiveIncrease        0       27
## 14 2020-02-28      positiveIncrease        0       40
## 15 2020-02-29      positiveIncrease       18       24
## 16 2020-03-01      positiveIncrease       16       78
## 17 2020-03-02      positiveIncrease       44       82
## 18 2020-03-03      positiveIncrease       48      100
## 19 2020-03-04      positiveIncrease       62      110
## 20 2020-03-05      positiveIncrease      103      151
## 21 2020-03-06      positiveIncrease      108      137
## 22 2020-03-07      positiveIncrease      175      217
## 23 2020-03-08      positiveIncrease      198      267
## 24 2020-03-09      positiveIncrease      292      367
## 25 2020-03-10      positiveIncrease      387      441
## 26 2020-03-11      positiveIncrease      509      527
## 27 2020-03-13      positiveIncrease     1072     1025
## 28 2020-03-15      positiveIncrease     1291     1251
## 29 2020-03-16      positiveIncrease     1739     1560
## 30 2020-03-17      positiveIncrease     2588     3613
## 31 2020-03-18      positiveIncrease     3089     3171
## 32 2020-03-20      positiveIncrease     6147     6255
## 33 2020-03-21      positiveIncrease     6793     6885
## 34 2020-03-22      positiveIncrease     9125     9259
## 35 2020-03-24      positiveIncrease    10769    10632
## 36 2020-04-03 hospitalizedCurrently    25729    25472
## 37 2020-04-29      positiveIncrease    26168    26641
## 38 2020-04-30      positiveIncrease    29975    29568
## 39 2020-05-08      positiveIncrease    27222    27605
## 40 2020-05-09      positiveIncrease    25218    24810
## 41 2020-05-10      positiveIncrease    21055    21504
## 42 2020-05-12      positiveIncrease    22890    22663
## 43 2020-05-26      positiveIncrease    16825    16629
## 44 2020-06-08      positiveIncrease    17209    17012
## 45 2020-06-19      positiveIncrease    31471    31046
## 46 2020-06-21      positiveIncrease    27928    27284
## 47 2020-06-23      positiveIncrease    33447    33021
## 48 2020-06-24      positiveIncrease    39075    38684
## 49 2020-06-25      positiveIncrease    39637    39072
## 50 2020-06-27      positiveIncrease    43164    43783
## 51 2020-06-29      positiveIncrease    39813    39175
## 52 2020-07-26      positiveIncrease    61000    61713
## 53 2020-07-28      positiveIncrease    59003    56229
## 54 2020-07-29      positiveIncrease    64408    66969
## 55 2020-08-02      positiveIncrease    46812    48266
## 56 2020-08-09      positiveIncrease    50627    51365
## 57 2020-08-16      positiveIncrease    42487    43083
## 58 2020-08-20      positiveIncrease    43758    43245
## 59 2020-08-20         deathIncrease     1134     1117
## Joining, by = c("date", "name")
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state                  name newValue oldValue
## 1    WA      positiveIncrease    70973    68687
## 2    WI hospitalizedCurrently    45951    51401
## Observations: 11,802
## Variables: 54
## $ date                        <date> 2020-09-30, 2020-09-30, 2020-09-30, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 8780, 154701, 83697, 0, 218507, 810625,...
## $ negative                    <dbl> 448427, 994475, 949107, 1571, 1243641, ...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 457207, 1132039, 1029717, 1571, 1457575...
## $ hospitalizedCurrently       <dbl> 53, 776, 484, NA, 560, 3267, 268, 104, ...
## $ hospitalizedCumulative      <dbl> NA, 17257, 5354, NA, 22119, NA, 7558, 1...
## $ inIcuCurrently              <dbl> NA, NA, 218, NA, 115, 830, NA, NA, 28, ...
## $ inIcuCumulative             <dbl> NA, 1815, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 7, NA, 93, NA, 55, NA, NA, NA, 9, NA, N...
## $ onVentilatorCumulative      <dbl> NA, 1025, 676, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 4555, 67948, 75312, NA, 35261, NA, 6500...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "9/30/2020 03:59", "9/30/2020 11:00", "...
## $ dateModified                <dttm> 2020-09-30 03:59:00, 2020-09-30 11:00:...
## $ checkTimeEt                 <chr> "09/29 23:59", "09/30 07:00", "09/29 20...
## $ death                       <dbl> 56, 2540, 1369, 0, 5650, 15792, 1952, 4...
## $ hospitalized                <dbl> NA, 17257, 5354, NA, 22119, NA, 7558, 1...
## $ dateChecked                 <dttm> 2020-09-30 03:59:00, 2020-09-30 11:00:...
## $ totalTestsViral             <dbl> 457207, 1132039, 1029717, 1571, NA, 147...
## $ positiveTestsViral          <dbl> 7807, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 449109, NA, 949107, NA, NA, NA, NA, NA,...
## $ positiveCasesViral          <dbl> 8780, 137564, 80610, 0, 213934, 810625,...
## $ deathConfirmed              <dbl> 56, 2399, 1223, NA, 5382, NA, NA, 3610,...
## $ deathProbable               <dbl> NA, 141, 146, NA, 268, NA, NA, 898, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1341180, NA, 38...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1457575, NA, 902242, NA...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 293098, NA, 167989, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 11733, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 156256, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 58393, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 11700, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 3383, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 106, 1147, 942, 0, 323, 3200, 535, 221,...
## $ negativeIncrease            <dbl> 6045, 11312, 21205, 0, 3735, 88457, 674...
## $ total                       <dbl> 457207, 1149176, 1032804, 1571, 1462148...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 6151, 12327, 21812, 0, 4047, 91657, 145...
## $ posNeg                      <dbl> 457207, 1149176, 1032804, 1571, 1462148...
## $ deathIncrease               <dbl> 0, 23, 19, 0, 18, 152, 7, 3, 1, 1, 175,...
## $ hospitalizedIncrease        <dbl> 0, 75, 0, 0, 72, 0, 28, 0, 0, 0, 253, 1...
## $ hash                        <chr> "1716aba4b705ff74814829d25487c6e57d2712...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        52622    732    NA    442114   995
## 2 TRUE       7145887 198197    NA 103496519 10807
## Observations: 10,807
## Variables: 6
## $ date   <date> 2020-09-30, 2020-09-30, 2020-09-30, 2020-09-30, 2020-09-30,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 106, 1147, 942, 323, 3200, 535, 221, 26, 82, 1948, 1720, 87,...
## $ deaths <dbl> 0, 23, 19, 18, 152, 7, 3, 1, 1, 175, 27, 0, 16, 4, 35, 20, 4...
## $ hosp   <dbl> 53, 776, 484, 560, 3267, 268, 104, 95, 72, 2097, 1896, 147, ...
## $ tests  <dbl> 6151, 12327, 21812, 4047, 91657, 14538, 12401, 1464, 1074, 1...
## Observations: 10,807
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

## 
## Recency is defined as 2020-09-01 through current
## 
## Recency is defined as 2020-09-01 through current

Segments appear to be on trend with previous analysis, with what was previously called “late pandemic” having peaked and the segments that had smaller deaths per million showing higher percentage increases.

USA Facts

The second section is for analysis of data from USA Facts. This site contains county-level data for the US focused on coronavirus cases and deaths. Data are provided with one row per county and one column for each date, and are thus unique by county. There are differences in state and US totals for coronavirus cases and deaths when reported by COVID Tracking Project and USA Facts, though trends, timing, and relative magnitudes are generally in agreement between the two sources.

Key Functions (USA Facts)

Functions for reading and analyzing data from USA Facts are sourced above from Coronavirus_Statistics_Functions_v002.R

Running Key Analyses (USA Facts)

The data from USA Facts can be loaded and analyzed using the sourced functions, a variable mapping file, and a main function that calls the other functions and returns a list:

# STEP 1a: Define the locations for the population, cases, and deaths file
popFile <- "./RInputFiles/Coronavirus/covid_county_population_usafacts.csv"
caseFile_20200903 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200903.csv"
deathFile_20200903 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200903.csv"
maxDate_20200903 <- "2020-08-31"

# STEP 1b: Read and pivot data from USA Facts; extract population data file as pop_usafacts
rawUSAFacts_20200903 <- readPivotUSAFacts(popFile=popFile, 
                                          caseFile=caseFile_20200903, 
                                          deathFile=deathFile_20200903, 
                                          unassignedDate=maxDate_20200903
                                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Joining, by = c("state", "date", "unassigned")

pop_usafacts <- rawUSAFacts_20200903$pop

# STEP 2: Read case and death data (redundant), combine, and add population totals; no clusters by default
burden_20200903_new <- readUSAFacts(
    caseFile=caseFile_20200903, 
    deathFile=deathFile_20200903, 
    stateClusters=NULL
    )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 228
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20`     <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20`     <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20`     <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20`     <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20`     <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20`     <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20`      <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20`      <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20`      <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20`      <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20`      <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20`      <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20`      <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20`      <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20`      <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20`     <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20`     <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20`     <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20`     <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20`     <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20`     <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20`     <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20`     <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20`     <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20`     <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20`     <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20`     <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20`     <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20`     <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20`     <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20`     <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20`     <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20`     <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20`     <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20`     <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20`     <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20`      <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20`      <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20`      <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20`      <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20`      <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20`      <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20`      <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20`      <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20`      <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20`     <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20`     <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20`     <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20`     <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20`     <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20`     <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20`     <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20`     <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20`     <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20`     <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20`     <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20`     <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20`     <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20`     <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20`     <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20`     <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20`     <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20`     <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20`     <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20`     <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20`     <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20`     <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20`      <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20`      <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20`      <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20`      <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20`      <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20`      <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20`      <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20`      <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20`      <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20`     <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20`     <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20`     <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20`     <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20`     <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20`     <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20`     <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20`     <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20`     <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20`     <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20`     <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20`     <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20`     <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20`     <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20`     <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20`     <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20`     <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20`     <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20`     <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20`     <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20`     <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20`      <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20`      <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20`      <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20`      <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20`      <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20`      <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20`      <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20`      <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20`      <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20`     <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20`     <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20`     <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20`     <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20`     <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20`     <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20`     <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20`     <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20`     <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20`     <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20`     <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20`     <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20`     <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20`     <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20`     <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20`     <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20`     <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20`     <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20`     <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20`     <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20`     <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20`     <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20`      <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20`      <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20`      <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20`      <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20`      <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20`      <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20`      <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20`      <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20`      <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20`     <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20`     <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20`     <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20`     <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20`     <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20`     <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20`     <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20`     <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20`     <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20`     <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20`     <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20`     <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20`     <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20`     <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20`     <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20`     <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20`     <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20`     <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20`     <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20`     <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20`     <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20`     <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20`      <dbl> 0, 1453, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 228
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20`     <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20`     <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20`      <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20`     <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20`     <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20`     <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20`     <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20`     <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20`     <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20`     <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20`      <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20`      <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20`      <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20`      <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20`     <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20`     <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20`     <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20`     <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20`     <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20`     <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20`     <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20`     <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20`     <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20`     <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20`     <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20`      <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20`      <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20`      <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20`      <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20`      <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20`     <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20`     <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20`     <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20`     <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20`     <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20`     <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20`     <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20`     <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20`     <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20`     <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20`     <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## Observations: 715,680
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# STEP 3: Explore the cases and deaths by county (can be repeated for other counties)
casesDeathsByCounty(useDate=maxDate_20200903, 
                    inclStates=c("FL", "GA", "SC", "AL", "MS"), 
                    caseData=rawUSAFacts_20200903$cases, 
                    deathData=rawUSAFacts_20200903$deaths,
                    highCaseAmount=80000, 
                    highDeathAmount=2000
                    )

# STEP 4: Create county-level clusters (k-means, 5 clusters, minimum county population 25k)
clust_20200903_new <- prepClusterCounties(burdenFile=burden_20200903_new, 
                                          maxDate=maxDate_20200903, 
                                          minPop=25000, 
                                          hierarchical=FALSE, 
                                          minShape=3,
                                          ratioDeathvsCase = 5,
                                          ratioTotalvsShape = 0.5,
                                          minDeath=100,
                                          minCase=5000,
                                          nCenters=5,
                                          testCenters=1:25,
                                          iter.max=20,
                                          nstart=10,
                                          seed=2009081450
                                          )
## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0352              0          0.266           0.140
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Cluster means and counts
##                  1      2     3      4      5
## .           501.00 473.00 53.00 199.00 365.00
## totalCases    0.69   0.32  1.23   1.14   0.52
## totalDeaths   1.36   0.36  9.67   4.76   1.86
## cases_3       0.01   0.02  0.08   0.02   0.02
## deaths_3      0.05   0.08  0.12   0.05   0.09
## cases_4       0.04   0.06  0.39   0.14   0.17
## deaths_4      0.35   0.54  2.07   0.85   1.45
## cases_5       0.05   0.08  0.17   0.14   0.17
## deaths_5      0.29   0.34  1.29   0.99   1.72
## cases_6       0.13   0.11  0.10   0.15   0.12
## deaths_6      0.36   0.52  0.62   0.70   0.73
## cases_7       0.39   0.27  0.14   0.32   0.24
## deaths_7      1.25   0.69  0.46   1.09   0.45
## cases_8       0.38   0.32  0.12   0.24   0.25
## deaths_8      2.61   0.58  0.43   1.32   0.51
# STEP 5: Extract the clusters from the clustering object
clustVec_county_20200903_new <- clust_20200903_new$objCluster$objCluster$cluster

# STEP 6: Assess the quality of the new clusters
helperACC_county_20200903_new <- helperAssessCountyClusters(clustVec_county_20200903_new, 
                                                            dfPop=clust_20200903_new$countyFiltered, 
                                                            dfBurden=clust_20200903_new$countyFiltered, 
                                                            thruLabel="Sep 3, 2020", 
                                                            plotsTogether=TRUE, 
                                                            orderCluster=TRUE
                                                            )

## 
## Recency is defined as 2020-08-02 through current
## 
## Recency is defined as 2020-08-02 through current

# STEP 7: Create a plot of cumulative burden by cluster
helperACC_county_20200903_new %>%
    select(cluster, date, pop, cases, deaths) %>%
    group_by(cluster, date) %>%
    summarize_if(is.numeric, sum, na.rm=TRUE) %>%
    arrange(date) %>%
    mutate(cpmcum=cumsum(cases)*1000000/pop, dpmcum=cumsum(deaths)*1000000/pop) %>%
    ungroup() %>%
    select(cluster, date, cases=cpmcum, deaths=dpmcum) %>%
    pivot_longer(-c(cluster, date)) %>%
    ggplot(aes(x=date, y=value, color=cluster)) + 
    geom_line(size=1) + 
    geom_text(data=~filter(., date==max(date)), 
              aes(x=date+lubridate::days(2), label=round(value)), 
              size=3, 
              hjust=0
              ) +
    labs(x="", title="Cumulative burden per million people by segment", y="") +
    facet_wrap(~c("cases"="Cases per million", "deaths"="Deaths per million")[name], scales="free_y") + 
    scale_x_date(date_breaks="1 months", date_labels="%b", expand=expand_scale(c(0, 0.1)))

# STEP 8: Add back clusters not used for analysis (code 999) and associated disease data
clusterStateData_20200903_new <- helperMakeClusterStateData(helperACC_county_20200903_new, 
                                                            dfBurden=clust_20200903_new$countyDailyPerCapita,
                                                            orderCluster=TRUE
                                                            )
## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"
# STEP 9: Run an example state-level summary (can expand to other states)
stateCountySummary(states=c("MN", "ND", "SD", "WI"),
                   df=changeOrderLabel(clusterStateData_20200903_new, grpVars="fipsCounty"),
                   keyDate=maxDate_20200903,
                   showQuadrants=TRUE, 
                   showCumulative=TRUE, 
                   facetCumulativeByState = TRUE, 
                   showAllFactorLevels = TRUE
                   )

# STEP 10a: Read in updated raw data
caseFile_20200917 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20200917.csv"
deathFile_20200917 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20200917.csv"
maxDate_20200917 <- "2020-09-15"

rawUSAFacts_20200917 <- readPivotUSAFacts(popFile=popFile, 
                                          caseFile=caseFile_20200917, 
                                          deathFile=deathFile_20200917, 
                                          unassignedDate=maxDate_20200917
                                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Joining, by = c("state", "date", "unassigned")

# STEP 10b: Use existing state-level segments with updated raw data
burden_20200917 <- readUSAFacts(
    caseFile=caseFile_20200917, 
    deathFile=deathFile_20200917,
    stateClusters=test_hier5_201001$useClusters
    )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 242
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20`     <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20`     <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20`     <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20`     <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20`     <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20`     <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20`      <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20`      <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20`      <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20`      <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20`      <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20`      <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20`      <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20`      <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20`      <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20`     <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20`     <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20`     <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20`     <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20`     <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20`     <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20`     <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20`     <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20`     <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20`     <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20`     <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20`     <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20`     <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20`     <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20`     <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20`     <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20`     <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20`     <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20`     <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20`     <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20`     <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20`      <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20`      <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20`      <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20`      <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20`      <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20`      <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20`      <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20`      <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20`      <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20`     <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20`     <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20`     <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20`     <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20`     <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20`     <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20`     <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20`     <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20`     <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20`     <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20`     <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20`     <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20`     <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20`     <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20`     <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20`     <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20`     <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20`     <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20`     <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20`     <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20`     <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20`     <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20`      <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20`      <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20`      <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20`      <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20`      <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20`      <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20`      <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20`      <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20`      <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20`     <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20`     <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20`     <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20`     <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20`     <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20`     <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20`     <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20`     <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20`     <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20`     <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20`     <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20`     <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20`     <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20`     <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20`     <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20`     <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20`     <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20`     <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20`     <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20`     <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20`     <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20`      <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20`      <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20`      <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20`      <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20`      <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20`      <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20`      <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20`      <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20`      <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20`     <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20`     <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20`     <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20`     <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20`     <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20`     <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20`     <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20`     <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20`     <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20`     <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20`     <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20`     <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20`     <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20`     <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20`     <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20`     <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20`     <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20`     <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20`     <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20`     <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20`     <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20`     <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20`      <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20`      <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20`      <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20`      <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20`      <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20`      <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20`      <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20`      <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20`      <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20`     <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20`     <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20`     <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20`     <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20`     <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20`     <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20`     <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20`     <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20`     <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20`     <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20`     <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20`     <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20`     <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20`     <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20`     <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20`     <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20`     <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20`     <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20`     <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20`     <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20`     <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20`     <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20`      <dbl> 0, 1452, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## $ `9/2/20`      <dbl> 0, 1452, 4583, 768, 562, 1308, 561, 872, 2633, 1045, ...
## $ `9/3/20`      <dbl> 0, 1466, 4628, 771, 564, 1336, 563, 874, 2678, 1046, ...
## $ `9/4/20`      <dbl> 0, 1475, 4654, 776, 570, 1361, 563, 881, 2747, 1054, ...
## $ `9/5/20`      <dbl> 0, 1492, 4686, 776, 576, 1376, 566, 886, 2830, 1059, ...
## $ `9/6/20`      <dbl> 0, 1498, 4713, 777, 581, 1379, 568, 890, 2842, 1061, ...
## $ `9/7/20`      <dbl> 0, 1504, 4730, 778, 583, 1384, 568, 892, 2877, 1063, ...
## $ `9/8/20`      <dbl> 0, 1508, 4757, 778, 589, 1390, 568, 892, 2891, 1064, ...
## $ `9/9/20`      <dbl> 0, 1522, 4787, 778, 591, 1401, 570, 895, 2907, 1068, ...
## $ `9/10/20`     <dbl> 0, 1544, 4833, 785, 594, 1430, 572, 896, 2958, 1076, ...
## $ `9/11/20`     <dbl> 0, 1551, 4886, 786, 602, 1441, 573, 896, 2988, 1088, ...
## $ `9/12/20`     <dbl> 0, 1565, 4922, 792, 604, 1446, 574, 898, 3047, 1094, ...
## $ `9/13/20`     <dbl> 0, 1576, 4959, 794, 607, 1453, 580, 899, 3093, 1094, ...
## $ `9/14/20`     <dbl> 0, 1585, 4978, 801, 610, 1464, 580, 900, 3110, 1097, ...
## $ `9/15/20`     <dbl> 0, 1601, 4992, 806, 611, 1475, 581, 901, 3127, 1102, ...
## Observations: 760,410
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 242
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20`     <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20`     <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20`      <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20`     <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20`     <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20`     <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20`     <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20`     <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20`     <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20`     <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20`      <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20`      <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20`      <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20`      <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20`     <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20`     <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20`     <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20`     <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20`     <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20`     <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20`     <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20`     <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20`     <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20`     <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20`     <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20`      <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20`      <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20`      <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20`      <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20`      <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20`     <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20`     <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20`     <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20`     <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20`     <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20`     <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20`     <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20`     <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20`     <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20`     <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20`     <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## $ `9/2/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/3/20`      <dbl> 0, 24, 44, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/4/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/5/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/6/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/7/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/8/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/9/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/10/20`     <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 13, 16, 12, 14, ...
## $ `9/11/20`     <dbl> 0, 24, 47, 7, 9, 12, 14, 38, 38, 41, 14, 17, 12, 15, ...
## $ `9/12/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/13/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/14/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/15/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## Observations: 760,410
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
# STEP 11: Compare burden in old data and new data
bind_rows(burden_20200903_new, burden_20200917, .id="source") %>%
    mutate(source=factor(case_when(source==1 ~ "2020-09-03", source==2 ~ "2020-09-17", TRUE ~ "Unknown"), 
                         levels=c("2020-09-17", "2020-09-03", "Unknown")
                         )
           ) %>%
    group_by(source, date) %>%
    summarize(cumDeaths=sum(cumDeaths), cumCases=sum(cumCases)) %>%
    pivot_longer(-c(source, date)) %>%
    ggplot(aes(x=date, y=value/1000, group=source, color=source)) + 
    geom_line() + 
    facet_wrap(~c("cumCases"="Cases", "cumDeaths"="Deaths")[name], scales="free_y") + 
    scale_x_date(date_breaks="1 months", date_labels="%m") + 
    labs(y="Burden (000s)", title="US National Coronavirus Burden by Source")

# STEP 12: Show top-level findings by segment
plotBurdenData(burden_20200917, maxDate=maxDate_20200917, minPop=10000)

# STEP 13: Create county-level clusters (k-means, 5 clusters, minimum county population 25k)
clust_20200917_new <- prepClusterCounties(burdenFile=burden_20200917, 
                                          maxDate=maxDate_20200917, 
                                          minPop=25000, 
                                          hierarchical=FALSE, 
                                          minShape=3,
                                          ratioDeathvsCase = 5,
                                          ratioTotalvsShape = 0.5,
                                          minDeath=100,
                                          minCase=5000,
                                          nCenters=5,
                                          testCenters=1:25,
                                          iter.max=20,
                                          nstart=10,
                                          seed=2009081450
                                          )
## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0283              0          0.224           0.104
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Cluster means and counts
##                  1      2     3      4      5
## .           503.00 484.00 73.00 219.00 312.00
## totalCases    0.77   0.39  1.40   1.14   0.56
## totalDeaths   1.56   0.46  9.47   4.69   1.97
## cases_3       0.01   0.02  0.06   0.01   0.02
## deaths_3      0.04   0.08  0.09   0.05   0.09
## cases_4       0.04   0.06  0.29   0.12   0.17
## deaths_4      0.32   0.53  1.61   0.76   1.46
## cases_5       0.05   0.07  0.15   0.12   0.16
## deaths_5      0.26   0.35  1.05   0.96   1.72
## cases_6       0.11   0.10  0.10   0.13   0.10
## deaths_6      0.33   0.51  0.58   0.63   0.66
## cases_7       0.34   0.23  0.20   0.29   0.20
## deaths_7      1.05   0.64  0.63   0.96   0.40
## cases_8       0.32   0.26  0.16   0.23   0.21
## deaths_8      2.09   0.52  0.78   1.22   0.42
## cases_9       0.14   0.16  0.05   0.08   0.11
## deaths_9      0.87   0.55  0.26   0.43   0.22
# STEP 14: Assess the previous clusters on the updated data
helper_test_20200917 <- helperAssessCountyClusters(vecCluster=clustVec_county_20200903_new, 
                                                   dfPop=clust_20200917_new$countyFiltered, 
                                                   dfBurden=clust_20200917_new$countyFiltered, 
                                                   thruLabel="Sep 15, 2020", 
                                                   plotsTogether=TRUE, 
                                                   showMap=TRUE, 
                                                   clusterPlotsTogether=TRUE, 
                                                   orderCluster=TRUE
                                                   )

## 
## Recency is defined as 2020-08-17 through current
## 
## Recency is defined as 2020-08-17 through current
## Warning: Removed 1 rows containing missing values (geom_point).

# STEP 15: Create normalized data
cNorm_20200917 <- helperMakeNormData(helper_test_20200917)

# STEP 16: Assess lags for early pandemic and late pandemic
lagData_early_20200917 <- helperTestLags(cNorm_20200917, minDate="2020-03-01", maxDate="2020-05-31")

## 
## The best lags are:
## # A tibble: 5 x 3
##   cluster  corr   lag
##   <fct>   <dbl> <int>
## 1 5       0.971     2
## 2 2       0.809     4
## 3 1       0.937     5
## 4 4       0.972     7
## 5 3       0.989     8

lagData_late_20200917 <- helperTestLags(cNorm_20200917, minDate="2020-06-01", maxDate="2020-09-15")

## 
## The best lags are:
## # A tibble: 5 x 3
##   cluster  corr   lag
##   <fct>   <dbl> <int>
## 1 2       0.781     6
## 2 4       0.886    16
## 3 1       0.967    19
## 4 5       0.435    23
## 5 3       0.472    26

# STEP 17: Generate a list of key counties
keyCounties_20200917 <- helper_test_20200917 %>%
    mutate(state=str_pad(state, width=5, side="left", pad="0")) %>%
    filter(pop >= 100000) %>%
    group_by(state, cluster) %>%
    summarize(dpm=sum(dpm), pop=mean(pop)) %>%
    group_by(cluster) %>%
    top_n(n=3, wt=dpm) %>%
    ungroup() %>%
    arrange(cluster, -dpm) %>%
    inner_join(select(usmap::countypop, -pop_2015), by=c("state"="fips")) %>%
    mutate(countyName=paste0(cluster, " - ", 
                             stringr::str_replace(county, "County|Parish", "("), 
                             abbr, 
                             ")"
                             )
           ) %>%
    select(-abbr, -county)

# STEP 18: Keep only key counties
cNorm_keyCounties_20200917 <- helper_test_20200917 %>%
    mutate(state=str_pad(state, width=5, side="left", pad="0")) %>%
    inner_join(select(keyCounties_20200917, state, countyName), by=c("state"="state")) %>%
    helperMakeNormData(aggBy=c("countyName", "state", "cluster"))

# STEP 19: Create early and late lags for key counties
lagData_early_keycounties_20200917 <- helperTestLags(cNorm_keyCounties_20200917, 
                                                     minDate="2020-03-01", 
                                                     maxDate="2020-05-31", 
                                                     aggBy=c("countyName", "state", "cluster"), 
                                                     maxRatio=0.25
                                                     )

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName          state cluster  corr   lag
##    <chr>               <chr> <fct>   <dbl> <int>
##  1 1 - Anderson (SC)   45007 1       0.717     0
##  2 1 - Webb (TX)       48479 1       0.642     0
##  3 2 - Douglas (NE)    31055 2       0.517     0
##  4 2 - Pulaski (AR)    05119 2       0.644     0
##  5 1 - Highlands (FL)  12055 1       0.772     1
##  6 5 - Litchfield (CT) 09005 5       0.894     4
##  7 2 - DeKalb (IL)     17037 2       0.654     5
##  8 3 - Bronx (NY)      36005 3       0.962     6
##  9 3 - Kings (NY)      36047 3       0.945     8
## 10 3 - Queens (NY)     36081 3       0.943     8
## 11 4 - Yuma (AZ)       04027 4       0.956    11
## 12 5 - Gloucester (NJ) 34015 5       0.874    12
## 13 4 - Caddo (LA)      22017 4       0.785    13
## 14 4 - Hidalgo (TX)    48215 4       0.632    17
## 15 5 - Barnstable (MA) 25001 5       0.679    21
## Warning: Removed 136 rows containing missing values (geom_path).
## Warning: Removed 136 rows containing missing values (geom_path).

lagData_late_keycounties_20200917 <- helperTestLags(cNorm_keyCounties_20200917, 
                                           minDate="2020-06-01", 
                                           maxDate="2020-09-15", 
                                           aggBy=c("countyName", "state", "cluster"), 
                                           maxRatio=0.25
                                           )

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName          state cluster   corr   lag
##    <chr>               <chr> <fct>    <dbl> <int>
##  1 5 - Litchfield (CT) 09005 5        0.179     6
##  2 1 - Highlands (FL)  12055 1        0.689    13
##  3 4 - Hidalgo (TX)    48215 4        0.758    16
##  4 2 - DeKalb (IL)     17037 2        0.130    19
##  5 2 - Douglas (NE)    31055 2        0.152    19
##  6 4 - Yuma (AZ)       04027 4        0.780    19
##  7 4 - Caddo (LA)      22017 4        0.312    24
##  8 2 - Pulaski (AR)    05119 2        0.436    25
##  9 1 - Anderson (SC)   45007 1        0.838    26
## 10 3 - Bronx (NY)      36005 3        0.755    26
## 11 3 - Kings (NY)      36047 3        0.929    26
## 12 3 - Queens (NY)     36081 3        0.734    26
## 13 5 - Gloucester (NJ) 34015 5       -0.412    29
## 14 1 - Webb (TX)       48479 1        0.738    30
## 15 5 - Barnstable (MA) 25001 5        0.560    30

# STEP 20: Run for only key counties
exploreTopCounties(helper_test_20200917, minDate="2020-03-01", maxDate="2020-05-31", nVar="pop", nKey=12)
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 12 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  625. 10039107
##  2 17031 4       4 - Cook (IL)         997.  5150233
##  3 48201 1       1 - Harris (TX)       518.  4713325
##  4 04013 4       4 - Maricopa (AZ)     707.  4485414
##  5 06073 2       2 - San Diego (CA)    222.  3338330
##  6 06059 1       1 - Orange (CA)       348.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1076.  2716940
##  8 48113 1       1 - Dallas (TX)       398.  2635516
##  9 36047 3       3 - Kings (NY)       2854.  2559903
## 10 06065 1       1 - Riverside (CA)    455.  2470546
## 11 32003 1       1 - Clark (NV)        568.  2266715
## 12 36081 3       3 - Queens (NY)      3208.  2253858

## 
## The best lags are:
## # A tibble: 12 x 5
##    countyName           state cluster  corr   lag
##    <chr>                <chr> <fct>   <dbl> <int>
##  1 1 - Clark (NV)       32003 1       0.799     0
##  2 1 - Dallas (TX)      48113 1       0.816     0
##  3 1 - Harris (TX)      48201 1       0.694     0
##  4 1 - Orange (CA)      06059 1       0.837     1
##  5 2 - San Diego (CA)   06073 2       0.861     1
##  6 4 - Cook (IL)        17031 4       0.972     4
##  7 4 - Maricopa (AZ)    04013 4       0.931     4
##  8 5 - Los Angeles (CA) 06037 5       0.874     4
##  9 3 - Kings (NY)       36047 3       0.945     8
## 10 3 - Queens (NY)      36081 3       0.943     8
## 11 1 - Riverside (CA)   06065 1       0.952    12
## 12 4 - Miami-Dade (FL)  12086 4       0.910    12
## Warning: Removed 23 rows containing missing values (geom_path).
## Warning: Removed 23 rows containing missing values (geom_path).

## # A tibble: 1,104 x 9
## # Groups:   countyName, state, cluster [12]
##    countyName         state cluster date         cpm7  dpm7  corr   lag cpmlag
##    <chr>              <chr> <fct>   <date>      <dbl> <dbl> <dbl> <int>  <dbl>
##  1 1 - Clark (NV)     32003 1       2020-03-01 0          0 0.799     0      0
##  2 1 - Dallas (TX)    48113 1       2020-03-01 0          0 0.816     0      0
##  3 1 - Harris (TX)    48201 1       2020-03-01 0          0 0.694     0      0
##  4 1 - Orange (CA)    06059 1       2020-03-01 0.0900     0 0.837     1     NA
##  5 1 - Riverside (CA) 06065 1       2020-03-01 0          0 0.952    12     NA
##  6 2 - San Diego (CA) 06073 2       2020-03-01 0          0 0.861     1     NA
##  7 3 - Kings (NY)     36047 3       2020-03-01 0          0 0.945     8     NA
##  8 3 - Queens (NY)    36081 3       2020-03-01 0          0 0.943     8     NA
##  9 4 - Cook (IL)      17031 4       2020-03-01 0.0555     0 0.972     4     NA
## 10 4 - Maricopa (AZ)  04013 4       2020-03-01 0          0 0.931     4     NA
## # ... with 1,094 more rows
exploreTopCounties(helper_test_20200917, minDate="2020-06-15", maxDate="2020-09-15", nVar="pop", nKey=12)
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 12 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  625. 10039107
##  2 17031 4       4 - Cook (IL)         997.  5150233
##  3 48201 1       1 - Harris (TX)       518.  4713325
##  4 04013 4       4 - Maricopa (AZ)     707.  4485414
##  5 06073 2       2 - San Diego (CA)    222.  3338330
##  6 06059 1       1 - Orange (CA)       348.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1076.  2716940
##  8 48113 1       1 - Dallas (TX)       398.  2635516
##  9 36047 3       3 - Kings (NY)       2854.  2559903
## 10 06065 1       1 - Riverside (CA)    455.  2470546
## 11 32003 1       1 - Clark (NV)        568.  2266715
## 12 36081 3       3 - Queens (NY)      3208.  2253858

## 
## The best lags are:
## # A tibble: 12 x 5
##    countyName           state cluster   corr   lag
##    <chr>                <chr> <fct>    <dbl> <int>
##  1 2 - San Diego (CA)   06073 2        0.771     5
##  2 3 - Kings (NY)       36047 3        0.773    12
##  3 5 - Los Angeles (CA) 06037 5        0.740    12
##  4 1 - Harris (TX)      48201 1        0.661    13
##  5 1 - Riverside (CA)   06065 1        0.505    16
##  6 4 - Maricopa (AZ)    04013 4        0.934    18
##  7 4 - Miami-Dade (FL)  12086 4        0.469    19
##  8 1 - Clark (NV)       32003 1        0.814    20
##  9 1 - Orange (CA)      06059 1        0.388    21
## 10 1 - Dallas (TX)      48113 1        0.609    30
## 11 3 - Queens (NY)      36081 3        0.768    30
## 12 4 - Cook (IL)        17031 4       -0.526    30

## # A tibble: 1,080 x 9
## # Groups:   countyName, state, cluster [12]
##    countyName         state cluster date        cpm7  dpm7   corr   lag cpmlag
##    <chr>              <chr> <fct>   <date>     <dbl> <dbl>  <dbl> <int>  <dbl>
##  1 1 - Clark (NV)     32003 1       2020-06-15  90.4 0.756  0.814    20     NA
##  2 1 - Dallas (TX)    48113 1       2020-06-15 125.  1.52   0.609    30     NA
##  3 1 - Harris (TX)    48201 1       2020-06-15  79.0 0.940  0.661    13     NA
##  4 1 - Orange (CA)    06059 1       2020-06-15  58.7 2.16   0.388    21     NA
##  5 1 - Riverside (CA) 06065 1       2020-06-15 129.  1.79   0.505    16     NA
##  6 2 - San Diego (CA) 06073 2       2020-06-15  46.8 0.984  0.771     5     NA
##  7 3 - Kings (NY)     36047 3       2020-06-15  50.1 4.41   0.773    12     NA
##  8 3 - Queens (NY)    36081 3       2020-06-15  53.0 3.74   0.768    30     NA
##  9 4 - Cook (IL)      17031 4       2020-06-15  61.1 6.16  -0.526    30     NA
## 10 4 - Maricopa (AZ)  04013 4       2020-06-15 250.  2.10   0.934    18     NA
## # ... with 1,070 more rows

The process is further converted to functional form as follows:

  1. Capability to either start the process from scratch or to leverage existing clusters at the state or county level
  2. Add the capability to compare a newly read file to an existing file
  3. Add the capability to show burden by existing segment after reading in an existing file
  4. Add the capability to prepare the data (create countyFiltered) without running the clustering proces
  5. Add the cumulative burden chart to helperAssessCountyClusters

New data are downloaded from 2020-10-03, and the updated code includes:

# STEP 1a: Define the locations for the population, cases, and deaths file
popFile <- "./RInputFiles/Coronavirus/covid_county_population_usafacts.csv"
caseFile_20201003 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201003.csv"
deathFile_20201003 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201003.csv"
maxDate_20201003 <- "2020-09-30"

# STEP 1b: Read and pivot data from USA Facts; extract population data file as pop_usafacts
rawUSAFacts_20201003 <- readPivotUSAFacts(popFile=popFile, 
                                          caseFile=caseFile_20201003, 
                                          deathFile=deathFile_20201003, 
                                          unassignedDate=maxDate_20201003
                                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Joining, by = c("state", "date", "unassigned")

pop_usafacts <- rawUSAFacts_20201003$pop

# STEP 2: Read case and death data (redundant), combine, and add population totals
# Add previous clusters by county rather than by state (function readUSAFacts updated)
burden_20201003 <- readUSAFacts(
    caseFile=caseFile_20201003, 
    deathFile=deathFile_20201003, 
    oldFile=burden_20200903_new,
    showBurdenMinPop=10000,
    maxDate=maxDate_20201003,
    stateClusters=NULL, 
    countyClusters=clustVec_county_20200903_new
    )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 258
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 2, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 1, 4, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 4, 4, 0, 0, 1, 0, 1, 2, 10, 1, 1, 0, 0, 1, 1, 0, 1...
## $ `3/26/20`     <dbl> 0, 6, 5, 0, 0, 2, 2, 1, 2, 13, 1, 4, 1, 0, 1, 1, 0, 1...
## $ `3/27/20`     <dbl> 0, 6, 5, 0, 0, 5, 2, 1, 3, 15, 1, 7, 1, 0, 1, 3, 0, 1...
## $ `3/28/20`     <dbl> 0, 6, 10, 0, 0, 5, 2, 1, 3, 17, 1, 7, 1, 0, 2, 4, 0, ...
## $ `3/29/20`     <dbl> 0, 6, 15, 0, 0, 5, 2, 1, 3, 27, 2, 8, 1, 0, 2, 5, 0, ...
## $ `3/30/20`     <dbl> 0, 7, 18, 0, 2, 5, 2, 1, 9, 36, 2, 10, 2, 0, 2, 5, 0,...
## $ `3/31/20`     <dbl> 0, 7, 19, 0, 3, 5, 2, 1, 9, 36, 2, 11, 3, 0, 2, 5, 0,...
## $ `4/1/20`      <dbl> 0, 10, 23, 0, 3, 5, 2, 1, 11, 45, 2, 13, 4, 2, 3, 6, ...
## $ `4/2/20`      <dbl> 0, 10, 25, 0, 4, 6, 2, 1, 12, 67, 4, 14, 4, 2, 7, 6, ...
## $ `4/3/20`      <dbl> 0, 12, 28, 1, 4, 9, 2, 1, 20, 81, 5, 15, 4, 3, 8, 7, ...
## $ `4/4/20`      <dbl> 0, 12, 29, 2, 4, 10, 2, 1, 21, 87, 6, 15, 4, 7, 9, 7,...
## $ `4/5/20`      <dbl> 0, 12, 34, 2, 7, 10, 2, 1, 24, 90, 6, 18, 5, 9, 9, 7,...
## $ `4/6/20`      <dbl> 0, 12, 38, 3, 7, 10, 2, 1, 38, 96, 6, 20, 6, 9, 9, 9,...
## $ `4/7/20`      <dbl> 0, 12, 42, 3, 8, 10, 2, 2, 48, 102, 6, 20, 6, 10, 9, ...
## $ `4/8/20`      <dbl> 0, 12, 49, 3, 9, 10, 3, 3, 52, 140, 7, 22, 6, 10, 11,...
## $ `4/9/20`      <dbl> 0, 17, 59, 7, 11, 11, 4, 3, 54, 161, 7, 25, 6, 13, 11...
## $ `4/10/20`     <dbl> 0, 17, 59, 9, 11, 12, 4, 3, 54, 171, 7, 27, 7, 13, 11...
## $ `4/11/20`     <dbl> 0, 19, 66, 10, 13, 12, 4, 6, 57, 184, 7, 30, 9, 15, 1...
## $ `4/12/20`     <dbl> 0, 19, 71, 10, 16, 13, 4, 7, 60, 200, 9, 30, 10, 19, ...
## $ `4/13/20`     <dbl> 0, 19, 78, 10, 17, 15, 6, 8, 61, 212, 9, 33, 10, 19, ...
## $ `4/14/20`     <dbl> 0, 23, 87, 11, 17, 16, 8, 8, 62, 216, 9, 33, 12, 21, ...
## $ `4/15/20`     <dbl> 0, 25, 98, 13, 19, 17, 8, 11, 62, 227, 10, 37, 13, 22...
## $ `4/16/20`     <dbl> 0, 25, 102, 14, 23, 18, 8, 11, 63, 234, 11, 37, 13, 2...
## $ `4/17/20`     <dbl> 0, 25, 103, 15, 23, 20, 8, 13, 63, 236, 12, 37, 13, 2...
## $ `4/18/20`     <dbl> 0, 25, 109, 18, 26, 20, 9, 13, 66, 240, 12, 39, 14, 2...
## $ `4/19/20`     <dbl> 0, 27, 114, 20, 28, 21, 9, 14, 72, 246, 12, 42, 14, 2...
## $ `4/20/20`     <dbl> 0, 28, 117, 22, 32, 22, 11, 14, 80, 257, 12, 43, 17, ...
## $ `4/21/20`     <dbl> 0, 30, 123, 28, 32, 26, 11, 15, 83, 259, 12, 44, 18, ...
## $ `4/22/20`     <dbl> 0, 32, 132, 29, 33, 29, 11, 17, 85, 270, 12, 46, 21, ...
## $ `4/23/20`     <dbl> 0, 33, 143, 30, 33, 31, 12, 19, 88, 275, 12, 47, 22, ...
## $ `4/24/20`     <dbl> 0, 36, 147, 32, 34, 31, 12, 21, 89, 282, 12, 49, 25, ...
## $ `4/25/20`     <dbl> 0, 37, 154, 33, 35, 31, 12, 28, 90, 284, 12, 49, 27, ...
## $ `4/26/20`     <dbl> 0, 37, 161, 33, 38, 34, 12, 32, 90, 285, 14, 51, 32, ...
## $ `4/27/20`     <dbl> 0, 39, 168, 35, 42, 34, 12, 34, 90, 289, 14, 51, 39, ...
## $ `4/28/20`     <dbl> 0, 40, 171, 37, 42, 34, 12, 45, 92, 290, 15, 52, 39, ...
## $ `4/29/20`     <dbl> 0, 42, 173, 37, 42, 36, 12, 51, 93, 290, 15, 52, 39, ...
## $ `4/30/20`     <dbl> 0, 42, 174, 39, 42, 37, 13, 53, 93, 290, 15, 52, 43, ...
## $ `5/1/20`      <dbl> 0, 42, 175, 42, 42, 39, 14, 65, 93, 290, 15, 52, 49, ...
## $ `5/2/20`      <dbl> 0, 45, 181, 43, 42, 40, 14, 92, 98, 294, 15, 54, 49, ...
## $ `5/3/20`      <dbl> 0, 48, 187, 45, 42, 40, 14, 105, 105, 300, 16, 57, 49...
## $ `5/4/20`      <dbl> 0, 53, 188, 45, 42, 40, 16, 114, 105, 302, 16, 58, 51...
## $ `5/5/20`      <dbl> 0, 53, 189, 47, 43, 40, 18, 120, 114, 304, 17, 60, 54...
## $ `5/6/20`      <dbl> 0, 58, 196, 47, 43, 42, 18, 130, 114, 306, 18, 61, 54...
## $ `5/7/20`      <dbl> 0, 61, 205, 51, 44, 44, 18, 155, 120, 308, 18, 63, 56...
## $ `5/8/20`      <dbl> 0, 67, 208, 53, 44, 44, 21, 162, 123, 311, 21, 63, 59...
## $ `5/9/20`      <dbl> 0, 68, 216, 58, 45, 44, 22, 178, 124, 314, 22, 64, 61...
## $ `5/10/20`     <dbl> 0, 74, 222, 59, 46, 44, 23, 189, 124, 316, 22, 65, 66...
## $ `5/11/20`     <dbl> 0, 84, 224, 61, 46, 45, 26, 196, 125, 319, 24, 67, 67...
## $ `5/12/20`     <dbl> 0, 91, 227, 67, 46, 45, 26, 224, 126, 324, 24, 69, 69...
## $ `5/13/20`     <dbl> 0, 93, 231, 69, 46, 45, 28, 230, 127, 324, 24, 73, 72...
## $ `5/14/20`     <dbl> 0, 103, 243, 74, 46, 45, 28, 249, 128, 326, 25, 74, 7...
## $ `5/15/20`     <dbl> 0, 103, 244, 79, 49, 45, 32, 258, 129, 326, 26, 75, 8...
## $ `5/16/20`     <dbl> 0, 110, 254, 79, 50, 45, 35, 271, 130, 328, 27, 77, 8...
## $ `5/17/20`     <dbl> 0, 110, 254, 81, 50, 46, 35, 272, 130, 328, 27, 77, 8...
## $ `5/18/20`     <dbl> 0, 120, 260, 85, 50, 47, 40, 285, 133, 329, 28, 79, 8...
## $ `5/19/20`     <dbl> 0, 127, 262, 90, 51, 47, 52, 295, 133, 329, 29, 80, 9...
## $ `5/20/20`     <dbl> 0, 136, 270, 96, 52, 47, 64, 312, 136, 330, 30, 83, 1...
## $ `5/21/20`     <dbl> 0, 147, 270, 100, 52, 48, 71, 321, 136, 330, 31, 84, ...
## $ `5/22/20`     <dbl> 0, 149, 271, 104, 55, 49, 89, 329, 137, 330, 33, 85, ...
## $ `5/23/20`     <dbl> 0, 155, 273, 105, 58, 49, 105, 335, 138, 330, 33, 86,...
## $ `5/24/20`     <dbl> 0, 159, 274, 110, 59, 49, 111, 344, 141, 336, 33, 87,...
## $ `5/25/20`     <dbl> 0, 173, 276, 116, 62, 49, 141, 368, 147, 337, 33, 87,...
## $ `5/26/20`     <dbl> 0, 189, 277, 122, 66, 51, 167, 380, 150, 338, 33, 90,...
## $ `5/27/20`     <dbl> 0, 192, 281, 130, 71, 53, 176, 391, 152, 340, 33, 93,...
## $ `5/28/20`     <dbl> 0, 205, 281, 132, 71, 58, 185, 392, 152, 349, 34, 97,...
## $ `5/29/20`     <dbl> 0, 212, 282, 147, 71, 60, 201, 396, 153, 352, 36, 99,...
## $ `5/30/20`     <dbl> 0, 216, 283, 150, 72, 61, 203, 402, 154, 353, 37, 100...
## $ `5/31/20`     <dbl> 0, 220, 288, 164, 75, 62, 209, 410, 157, 355, 37, 100...
## $ `6/1/20`      <dbl> 0, 233, 292, 172, 76, 63, 209, 414, 164, 358, 38, 103...
## $ `6/2/20`      <dbl> 0, 238, 292, 175, 76, 63, 212, 416, 165, 358, 38, 104...
## $ `6/3/20`      <dbl> 0, 239, 292, 177, 76, 63, 215, 419, 165, 359, 38, 105...
## $ `6/4/20`      <dbl> 0, 241, 293, 177, 76, 63, 217, 421, 167, 360, 38, 107...
## $ `6/5/20`      <dbl> 0, 248, 296, 183, 76, 64, 219, 431, 169, 363, 38, 108...
## $ `6/6/20`      <dbl> 0, 259, 304, 190, 77, 70, 225, 442, 174, 373, 40, 108...
## $ `6/7/20`      <dbl> 0, 265, 313, 193, 77, 72, 232, 449, 176, 378, 42, 110...
## $ `6/8/20`      <dbl> 0, 272, 320, 197, 79, 73, 238, 455, 178, 383, 42, 111...
## $ `6/9/20`      <dbl> 0, 282, 325, 199, 85, 75, 243, 464, 180, 391, 42, 117...
## $ `6/10/20`     <dbl> 0, 295, 331, 208, 89, 79, 248, 471, 182, 401, 42, 118...
## $ `6/11/20`     <dbl> 0, 312, 343, 214, 93, 87, 253, 484, 184, 417, 42, 121...
## $ `6/12/20`     <dbl> 0, 323, 353, 221, 97, 95, 258, 499, 188, 427, 46, 122...
## $ `6/13/20`     <dbl> 0, 331, 361, 226, 100, 102, 276, 517, 190, 438, 47, 1...
## $ `6/14/20`     <dbl> 0, 357, 364, 234, 104, 110, 302, 536, 195, 453, 51, 1...
## $ `6/15/20`     <dbl> 0, 368, 383, 238, 111, 116, 307, 544, 204, 475, 53, 1...
## $ `6/16/20`     <dbl> 0, 373, 389, 245, 116, 121, 310, 551, 206, 485, 53, 1...
## $ `6/17/20`     <dbl> 0, 375, 392, 251, 118, 123, 313, 554, 208, 486, 53, 1...
## $ `6/18/20`     <dbl> 0, 400, 401, 263, 121, 130, 320, 566, 210, 501, 55, 1...
## $ `6/19/20`     <dbl> 0, 411, 413, 266, 126, 139, 320, 569, 210, 507, 58, 1...
## $ `6/20/20`     <dbl> 0, 431, 420, 272, 126, 143, 327, 572, 211, 516, 58, 1...
## $ `6/21/20`     <dbl> 0, 434, 430, 272, 127, 149, 327, 576, 213, 521, 58, 1...
## $ `6/22/20`     <dbl> 0, 442, 437, 277, 129, 153, 328, 578, 215, 528, 58, 1...
## $ `6/23/20`     <dbl> 0, 453, 450, 280, 135, 159, 329, 581, 216, 534, 58, 1...
## $ `6/24/20`     <dbl> 0, 469, 464, 288, 141, 168, 336, 584, 220, 543, 58, 1...
## $ `6/25/20`     <dbl> 0, 479, 477, 305, 149, 176, 351, 588, 233, 549, 64, 1...
## $ `6/26/20`     <dbl> 0, 488, 515, 312, 153, 184, 351, 594, 236, 559, 68, 1...
## $ `6/27/20`     <dbl> 0, 498, 555, 317, 161, 188, 358, 600, 245, 561, 69, 2...
## $ `6/28/20`     <dbl> 0, 503, 575, 317, 162, 189, 358, 602, 245, 561, 70, 2...
## $ `6/29/20`     <dbl> 0, 527, 643, 322, 165, 199, 365, 605, 269, 585, 73, 2...
## $ `6/30/20`     <dbl> 0, 537, 680, 325, 170, 208, 365, 607, 276, 590, 74, 2...
## $ `7/1/20`      <dbl> 0, 553, 703, 326, 174, 218, 367, 607, 278, 595, 77, 2...
## $ `7/2/20`      <dbl> 0, 561, 751, 335, 179, 222, 369, 610, 288, 611, 82, 2...
## $ `7/3/20`      <dbl> 0, 568, 845, 348, 189, 230, 372, 625, 330, 625, 88, 2...
## $ `7/4/20`      <dbl> 0, 591, 863, 350, 190, 234, 373, 626, 340, 637, 88, 2...
## $ `7/5/20`      <dbl> 0, 615, 881, 352, 193, 239, 373, 634, 362, 642, 100, ...
## $ `7/6/20`      <dbl> 0, 618, 911, 356, 197, 247, 373, 634, 384, 655, 105, ...
## $ `7/7/20`      <dbl> 0, 644, 997, 360, 199, 255, 373, 634, 395, 656, 106, ...
## $ `7/8/20`      <dbl> 0, 651, 1056, 366, 201, 262, 374, 639, 411, 660, 114,...
## $ `7/9/20`      <dbl> 0, 661, 1131, 371, 211, 282, 375, 646, 445, 672, 115,...
## $ `7/10/20`     <dbl> 0, 670, 1187, 381, 218, 292, 381, 648, 465, 679, 118,...
## $ `7/11/20`     <dbl> 0, 684, 1224, 398, 224, 307, 382, 654, 500, 690, 128,...
## $ `7/12/20`     <dbl> 0, 706, 1294, 403, 228, 331, 383, 655, 526, 693, 129,...
## $ `7/13/20`     <dbl> 0, 728, 1359, 413, 231, 350, 383, 660, 566, 702, 136,...
## $ `7/14/20`     <dbl> 0, 746, 1414, 428, 236, 366, 385, 661, 589, 712, 140,...
## $ `7/15/20`     <dbl> 0, 756, 1518, 441, 242, 389, 386, 664, 655, 718, 145,...
## $ `7/16/20`     <dbl> 0, 780, 1599, 459, 247, 424, 389, 669, 675, 731, 152,...
## $ `7/17/20`     <dbl> 0, 789, 1689, 463, 255, 440, 393, 672, 720, 742, 157,...
## $ `7/18/20`     <dbl> 0, 827, 1819, 483, 264, 458, 397, 678, 741, 756, 165,...
## $ `7/19/20`     <dbl> 0, 842, 1937, 495, 269, 482, 398, 686, 785, 762, 173,...
## $ `7/20/20`     <dbl> 0, 857, 2013, 503, 279, 507, 400, 689, 832, 767, 179,...
## $ `7/21/20`     <dbl> 0, 865, 2102, 514, 283, 524, 401, 695, 869, 774, 182,...
## $ `7/22/20`     <dbl> 0, 886, 2196, 518, 287, 547, 407, 701, 891, 782, 184,...
## $ `7/23/20`     <dbl> 0, 905, 2461, 534, 289, 585, 408, 706, 934, 789, 193,...
## $ `7/24/20`     <dbl> 0, 921, 2513, 539, 303, 615, 411, 711, 999, 797, 205,...
## $ `7/25/20`     <dbl> 0, 932, 2662, 552, 318, 637, 414, 720, 1062, 810, 207...
## $ `7/26/20`     <dbl> 0, 942, 2708, 562, 324, 646, 415, 724, 1113, 821, 209...
## $ `7/27/20`     <dbl> 0, 965, 2770, 569, 334, 669, 416, 730, 1194, 825, 220...
## $ `7/28/20`     <dbl> 0, 974, 2835, 575, 337, 675, 429, 734, 1243, 836, 221...
## $ `7/29/20`     <dbl> 0, 974, 2835, 575, 338, 675, 429, 734, 1244, 836, 221...
## $ `7/30/20`     <dbl> 0, 1002, 3028, 585, 352, 731, 435, 747, 1336, 848, 23...
## $ `7/31/20`     <dbl> 0, 1015, 3101, 598, 363, 767, 437, 753, 1450, 859, 23...
## $ `8/1/20`      <dbl> 0, 1030, 3142, 602, 368, 792, 443, 757, 1480, 861, 24...
## $ `8/2/20`      <dbl> 0, 1052, 3223, 610, 372, 813, 445, 765, 1580, 868, 25...
## $ `8/3/20`      <dbl> 0, 1066, 3265, 612, 382, 830, 446, 766, 1612, 875, 26...
## $ `8/4/20`      <dbl> 0, 1073, 3320, 614, 389, 836, 449, 766, 1646, 882, 26...
## $ `8/5/20`      <dbl> 0, 1073, 3380, 615, 392, 839, 452, 769, 1683, 886, 27...
## $ `8/6/20`      <dbl> 0, 1096, 3438, 619, 421, 874, 458, 771, 1741, 893, 28...
## $ `8/7/20`      <dbl> 0, 1113, 3504, 624, 424, 909, 462, 774, 1777, 899, 29...
## $ `8/8/20`      <dbl> 0, 1134, 3564, 628, 434, 923, 471, 773, 1836, 904, 29...
## $ `8/9/20`      <dbl> 0, 1215, 3606, 630, 446, 934, 472, 779, 1860, 906, 30...
## $ `8/10/20`     <dbl> 0, 1215, 3714, 631, 450, 947, 474, 782, 1883, 909, 30...
## $ `8/11/20`     <dbl> 0, 1215, 3736, 643, 455, 958, 489, 785, 1914, 916, 30...
## $ `8/12/20`     <dbl> 0, 1241, 3776, 646, 464, 967, 500, 788, 1935, 918, 31...
## $ `8/13/20`     <dbl> 0, 1250, 3813, 651, 469, 977, 501, 790, 1959, 919, 32...
## $ `8/14/20`     <dbl> 0, 1252, 3860, 656, 477, 989, 502, 796, 1975, 922, 32...
## $ `8/15/20`     <dbl> 0, 1262, 3909, 663, 483, 996, 503, 807, 2019, 925, 33...
## $ `8/16/20`     <dbl> 0, 1273, 3948, 671, 483, 1005, 504, 811, 2037, 927, 3...
## $ `8/17/20`     <dbl> 0, 1274, 3960, 672, 488, 1008, 504, 814, 2055, 928, 3...
## $ `8/18/20`     <dbl> 0, 1291, 3977, 674, 490, 1034, 512, 814, 2107, 937, 3...
## $ `8/19/20`     <dbl> 0, 1293, 4002, 683, 503, 1049, 530, 814, 2159, 941, 3...
## $ `8/20/20`     <dbl> 0, 1293, 4035, 690, 507, 1077, 534, 814, 2214, 949, 3...
## $ `8/21/20`     <dbl> 0, 1293, 4054, 690, 509, 1083, 534, 814, 2228, 952, 3...
## $ `8/22/20`     <dbl> 0, 1322, 4115, 699, 516, 1096, 536, 822, 2276, 957, 3...
## $ `8/23/20`     <dbl> 0, 1324, 4147, 702, 523, 1099, 536, 824, 2286, 958, 3...
## $ `8/24/20`     <dbl> 0, 1351, 4167, 720, 526, 1135, 536, 825, 2327, 971, 3...
## $ `8/25/20`     <dbl> 0, 1355, 4190, 724, 527, 1160, 536, 826, 2345, 973, 3...
## $ `8/26/20`     <dbl> 0, 1366, 4265, 732, 530, 1195, 537, 833, 2400, 983, 3...
## $ `8/27/20`     <dbl> 0, 1377, 4311, 739, 533, 1213, 538, 839, 2413, 1011, ...
## $ `8/28/20`     <dbl> 0, 1389, 4347, 745, 535, 1219, 541, 840, 2443, 1017, ...
## $ `8/29/20`     <dbl> 0, 1400, 4424, 753, 540, 1248, 546, 855, 2499, 1024, ...
## $ `8/30/20`     <dbl> 0, 1438, 4525, 757, 550, 1277, 550, 864, 2533, 1027, ...
## $ `8/31/20`     <dbl> 0, 1442, 4545, 757, 554, 1287, 551, 866, 2567, 1033, ...
## $ `9/1/20`      <dbl> 0, 1452, 4568, 764, 558, 1303, 559, 871, 2619, 1041, ...
## $ `9/2/20`      <dbl> 0, 1452, 4583, 768, 562, 1308, 561, 872, 2633, 1045, ...
## $ `9/3/20`      <dbl> 0, 1466, 4628, 771, 564, 1336, 563, 874, 2678, 1046, ...
## $ `9/4/20`      <dbl> 0, 1475, 4654, 776, 570, 1361, 563, 881, 2747, 1054, ...
## $ `9/5/20`      <dbl> 0, 1492, 4686, 776, 576, 1376, 566, 886, 2830, 1059, ...
## $ `9/6/20`      <dbl> 0, 1498, 4713, 777, 581, 1379, 568, 890, 2842, 1061, ...
## $ `9/7/20`      <dbl> 0, 1504, 4730, 778, 583, 1384, 568, 892, 2877, 1063, ...
## $ `9/8/20`      <dbl> 0, 1508, 4757, 778, 589, 1390, 568, 892, 2891, 1064, ...
## $ `9/9/20`      <dbl> 0, 1522, 4787, 778, 591, 1401, 570, 895, 2907, 1068, ...
## $ `9/10/20`     <dbl> 0, 1544, 4833, 785, 594, 1430, 572, 896, 2958, 1076, ...
## $ `9/11/20`     <dbl> 0, 1551, 4886, 786, 602, 1441, 573, 896, 2988, 1088, ...
## $ `9/12/20`     <dbl> 0, 1565, 4922, 792, 604, 1446, 574, 898, 3047, 1094, ...
## $ `9/13/20`     <dbl> 0, 1576, 4959, 794, 607, 1453, 580, 899, 3093, 1094, ...
## $ `9/14/20`     <dbl> 0, 1585, 4978, 801, 610, 1464, 580, 900, 3110, 1097, ...
## $ `9/15/20`     <dbl> 0, 1601, 4992, 806, 611, 1475, 581, 901, 3127, 1102, ...
## $ `9/16/20`     <dbl> 0, 1619, 5003, 809, 612, 1487, 583, 901, 3165, 1106, ...
## $ `9/17/20`     <dbl> 0, 1624, 5021, 809, 617, 1504, 585, 902, 3211, 1106, ...
## $ `9/18/20`     <dbl> 0, 1664, 5033, 824, 619, 1527, 585, 906, 3249, 1117, ...
## $ `9/19/20`     <dbl> 0, 1673, 5047, 830, 628, 1542, 585, 908, 3320, 1123, ...
## $ `9/20/20`     <dbl> 0, 1690, 5061, 835, 632, 1551, 587, 909, 3338, 1130, ...
## $ `9/21/20`     <dbl> 0, 1691, 5087, 838, 635, 1560, 591, 911, 3374, 1132, ...
## $ `9/22/20`     <dbl> 0, 1714, 5124, 848, 635, 1573, 593, 911, 3390, 1140, ...
## $ `9/23/20`     <dbl> 0, 1715, 5141, 851, 638, 1580, 597, 911, 3401, 1144, ...
## $ `9/24/20`     <dbl> 1074, 1715, 5141, 851, 638, 1580, 597, 911, 3401, 114...
## $ `9/25/20`     <dbl> 0, 1757, 5456, 873, 652, 1608, 599, 912, 3499, 1161, ...
## $ `9/26/20`     <dbl> 0, 1764, 5477, 882, 654, 1611, 604, 912, 3515, 1164, ...
## $ `9/27/20`     <dbl> 0, 1773, 5526, 885, 656, 1617, 606, 913, 3534, 1168, ...
## $ `9/28/20`     <dbl> 0, 1785, 5588, 886, 657, 1618, 607, 914, 3548, 1172, ...
## $ `9/29/20`     <dbl> 0, 1787, 5606, 886, 658, 1621, 607, 917, 3556, 1175, ...
## $ `9/30/20`     <dbl> 0, 1791, 5640, 896, 664, 1629, 610, 917, 3569, 1179, ...
## $ `10/1/20`     <dbl> 0, 1798, 5997, 898, 672, 1634, 612, 919, 3587, 1181, ...
## Observations: 811,530
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 3,195
## Variables: 258
## $ countyFIPS    <dbl> 0, 1001, 1003, 1005, 1007, 1009, 1011, 1013, 1015, 10...
## $ `County Name` <chr> "Statewide Unallocated", "Autauga County", "Baldwin C...
## $ State         <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",...
## $ stateFIPS     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ `1/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `1/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `2/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/8/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/9/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/10/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/11/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/12/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/13/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/14/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/15/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/16/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/17/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/18/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/19/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/20/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/21/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/22/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/23/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/24/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/25/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/26/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/27/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/28/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/29/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/30/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `3/31/20`     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/1/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/2/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/3/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/4/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/5/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/6/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/7/20`      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/8/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/9/20`      <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `4/10/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/11/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/12/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/13/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/14/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/15/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/16/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ `4/17/20`     <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/18/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/19/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/20/20`     <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 2, 11, 0, 0, 0, 0, 0, 0, 0, 1...
## $ `4/21/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 13, 0, 0, 0, 1, 0, 0, 0, 1...
## $ `4/22/20`     <dbl> 0, 1, 2, 0, 0, 0, 0, 0, 3, 16, 0, 0, 0, 1, 0, 1, 0, 1...
## $ `4/23/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 16, 0, 1, 0, 1, 1, 1, 0, 1...
## $ `4/24/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 17, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/25/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 0, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/26/20`     <dbl> 0, 2, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/27/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 18, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/28/20`     <dbl> 0, 3, 2, 0, 0, 0, 0, 1, 3, 19, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/29/20`     <dbl> 0, 3, 2, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `4/30/20`     <dbl> 0, 3, 3, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/1/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/2/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/3/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/4/20`      <dbl> 0, 3, 4, 1, 0, 0, 0, 1, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/5/20`      <dbl> 0, 3, 5, 1, 0, 0, 0, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/6/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/7/20`      <dbl> 0, 3, 5, 1, 0, 0, 1, 2, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/8/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 3, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/9/20`      <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/10/20`     <dbl> 0, 3, 5, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/11/20`     <dbl> 0, 3, 6, 1, 1, 0, 1, 6, 3, 21, 0, 1, 0, 1, 1, 1, 0, 2...
## $ `5/12/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 21, 0, 1, 1, 1, 1, 1, 1, 2...
## $ `5/13/20`     <dbl> 0, 3, 7, 1, 1, 0, 1, 6, 3, 22, 0, 1, 2, 1, 1, 1, 1, 2...
## $ `5/14/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 8, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/15/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/16/20`     <dbl> 0, 3, 8, 1, 1, 0, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/17/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 9, 3, 22, 0, 1, 3, 2, 2, 1, 1, 2...
## $ `5/18/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/19/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 10, 3, 22, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/20/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/21/20`     <dbl> 0, 3, 8, 1, 1, 1, 1, 11, 3, 23, 0, 1, 3, 2, 2, 1, 1, ...
## $ `5/22/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/23/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 11, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/24/20`     <dbl> 0, 3, 9, 1, 1, 1, 1, 12, 3, 23, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/25/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 12, 3, 24, 2, 1, 4, 2, 2, 1, 1, ...
## $ `5/26/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/27/20`     <dbl> 0, 3, 9, 1, 1, 1, 3, 13, 3, 24, 2, 1, 7, 2, 2, 1, 1, ...
## $ `5/28/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 15, 3, 24, 2, 1, 8, 2, 2, 1, 1, ...
## $ `5/29/20`     <dbl> 0, 3, 9, 1, 1, 1, 4, 16, 3, 24, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/30/20`     <dbl> 0, 4, 9, 1, 1, 1, 4, 17, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `5/31/20`     <dbl> 0, 4, 9, 1, 1, 1, 5, 18, 3, 25, 3, 1, 8, 2, 2, 1, 1, ...
## $ `6/1/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 25, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/2/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/3/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/4/20`      <dbl> 0, 5, 9, 1, 1, 1, 6, 18, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/5/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 21, 3, 26, 3, 1, 10, 2, 2, 1, 1,...
## $ `6/6/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/7/20`      <dbl> 0, 5, 9, 1, 1, 1, 7, 22, 3, 26, 4, 2, 10, 2, 2, 1, 1,...
## $ `6/8/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/9/20`      <dbl> 0, 5, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 10, 3, 2, 1, 1,...
## $ `6/10/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 24, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/11/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 4, 2, 11, 3, 2, 1, 1,...
## $ `6/12/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/13/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/14/20`     <dbl> 0, 6, 9, 1, 1, 1, 8, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/15/20`     <dbl> 0, 6, 9, 1, 1, 1, 9, 25, 3, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/16/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/17/20`     <dbl> 0, 7, 9, 1, 1, 1, 9, 25, 4, 26, 5, 2, 11, 3, 2, 1, 1,...
## $ `6/18/20`     <dbl> 0, 8, 9, 1, 1, 1, 9, 25, 4, 26, 5, 3, 11, 4, 2, 1, 1,...
## $ `6/19/20`     <dbl> 0, 8, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 11, 4, 2, 1, 1...
## $ `6/20/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/21/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/22/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 26, 4, 27, 6, 3, 12, 4, 2, 1, 1...
## $ `6/23/20`     <dbl> 0, 9, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 4, 2, 1, 1...
## $ `6/24/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/25/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/26/20`     <dbl> 0, 11, 9, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1, ...
## $ `6/27/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/28/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/29/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `6/30/20`     <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/1/20`      <dbl> 0, 12, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 5, 2, 1,...
## $ `7/2/20`      <dbl> 0, 13, 10, 1, 1, 1, 10, 27, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/3/20`      <dbl> 0, 13, 10, 2, 1, 1, 10, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/4/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/5/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/6/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/7/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/8/20`      <dbl> 0, 13, 10, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/9/20`      <dbl> 0, 14, 11, 2, 1, 1, 11, 28, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/10/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/11/20`     <dbl> 0, 15, 12, 2, 1, 1, 11, 29, 5, 27, 7, 3, 12, 6, 2, 1,...
## $ `7/12/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/13/20`     <dbl> 0, 16, 12, 2, 1, 1, 11, 29, 5, 30, 7, 3, 12, 6, 2, 1,...
## $ `7/14/20`     <dbl> 0, 18, 12, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/15/20`     <dbl> 0, 19, 13, 3, 2, 1, 11, 31, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/16/20`     <dbl> 0, 20, 14, 3, 2, 1, 11, 32, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/17/20`     <dbl> 0, 21, 14, 3, 2, 1, 11, 33, 6, 32, 7, 4, 12, 6, 2, 1,...
## $ `7/18/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/19/20`     <dbl> 0, 21, 15, 3, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/20/20`     <dbl> 0, 21, 15, 4, 2, 1, 11, 33, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/21/20`     <dbl> 0, 21, 16, 4, 2, 1, 11, 34, 6, 33, 7, 4, 12, 7, 2, 1,...
## $ `7/22/20`     <dbl> 0, 21, 16, 4, 2, 1, 12, 34, 6, 34, 7, 5, 12, 8, 2, 1,...
## $ `7/23/20`     <dbl> 0, 21, 17, 4, 2, 1, 12, 35, 6, 34, 7, 5, 12, 9, 2, 1,...
## $ `7/24/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/25/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/26/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 35, 6, 37, 8, 5, 12, 9, 3, 1,...
## $ `7/27/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/28/20`     <dbl> 0, 21, 18, 4, 2, 1, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/29/20`     <dbl> 0, 21, 21, 4, 2, 3, 12, 36, 6, 38, 8, 6, 12, 9, 4, 1,...
## $ `7/30/20`     <dbl> 0, 21, 21, 5, 2, 3, 12, 36, 8, 38, 8, 6, 12, 9, 5, 1,...
## $ `7/31/20`     <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/1/20`      <dbl> 0, 21, 22, 5, 2, 3, 12, 36, 9, 38, 8, 6, 12, 9, 5, 1,...
## $ `8/2/20`      <dbl> 0, 21, 23, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/3/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 9, 38, 8, 7, 12, 9, 5, 1,...
## $ `8/4/20`      <dbl> 0, 21, 24, 5, 3, 3, 12, 36, 12, 38, 8, 7, 12, 9, 5, 1...
## $ `8/5/20`      <dbl> 0, 22, 24, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/6/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 7, 12, 9, 5, 1...
## $ `8/7/20`      <dbl> 0, 22, 25, 5, 4, 3, 12, 36, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/8/20`      <dbl> 0, 22, 26, 5, 5, 4, 12, 37, 13, 38, 8, 8, 12, 9, 5, 1...
## $ `8/9/20`      <dbl> 0, 22, 27, 5, 5, 4, 12, 37, 14, 38, 8, 8, 12, 9, 5, 1...
## $ `8/10/20`     <dbl> 0, 22, 28, 5, 5, 4, 12, 37, 17, 38, 9, 9, 12, 10, 5, ...
## $ `8/11/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 19, 38, 9, 12, 12, 10, 5,...
## $ `8/12/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/13/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/14/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/15/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/16/20`     <dbl> 0, 23, 32, 6, 6, 5, 12, 37, 20, 38, 9, 12, 12, 10, 5,...
## $ `8/17/20`     <dbl> 0, 23, 32, 6, 6, 5, 14, 37, 23, 38, 9, 12, 12, 10, 5,...
## $ `8/18/20`     <dbl> 0, 23, 33, 6, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/19/20`     <dbl> 0, 23, 33, 7, 6, 5, 14, 37, 25, 38, 9, 12, 12, 10, 5,...
## $ `8/20/20`     <dbl> 0, 23, 34, 7, 6, 5, 14, 37, 25, 38, 10, 12, 12, 10, 5...
## $ `8/21/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 5...
## $ `8/22/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/23/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 25, 38, 10, 13, 12, 10, 6...
## $ `8/24/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 27, 38, 10, 13, 12, 11, 6...
## $ `8/25/20`     <dbl> 0, 23, 35, 7, 6, 6, 14, 37, 28, 39, 10, 13, 12, 11, 6...
## $ `8/26/20`     <dbl> 0, 23, 36, 7, 6, 7, 14, 37, 28, 39, 10, 13, 12, 12, 6...
## $ `8/27/20`     <dbl> 0, 23, 37, 7, 6, 7, 14, 37, 30, 39, 12, 13, 12, 13, 6...
## $ `8/28/20`     <dbl> 0, 23, 39, 7, 6, 9, 14, 37, 32, 40, 12, 13, 12, 13, 6...
## $ `8/29/20`     <dbl> 0, 23, 40, 7, 7, 9, 14, 37, 35, 40, 12, 13, 12, 14, 6...
## $ `8/30/20`     <dbl> 0, 23, 40, 7, 7, 10, 14, 37, 35, 40, 12, 13, 12, 14, ...
## $ `8/31/20`     <dbl> 0, 23, 42, 7, 8, 11, 14, 37, 36, 40, 12, 13, 12, 14, ...
## $ `9/1/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 13, 12, 14, ...
## $ `9/2/20`      <dbl> 0, 24, 42, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/3/20`      <dbl> 0, 24, 44, 7, 8, 11, 14, 37, 38, 40, 12, 14, 12, 14, ...
## $ `9/4/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/5/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/6/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/7/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/8/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/9/20`      <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 12, 16, 12, 14, ...
## $ `9/10/20`     <dbl> 0, 24, 46, 7, 9, 12, 14, 38, 38, 41, 13, 16, 12, 14, ...
## $ `9/11/20`     <dbl> 0, 24, 47, 7, 9, 12, 14, 38, 38, 41, 14, 17, 12, 15, ...
## $ `9/12/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/13/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/14/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 14, 18, 12, 16, ...
## $ `9/15/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## $ `9/16/20`     <dbl> 0, 24, 47, 7, 9, 13, 14, 38, 38, 42, 15, 24, 12, 16, ...
## $ `9/17/20`     <dbl> 0, 24, 48, 7, 9, 13, 14, 38, 39, 42, 15, 25, 12, 17, ...
## $ `9/18/20`     <dbl> 0, 24, 48, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/19/20`     <dbl> 0, 24, 49, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/20/20`     <dbl> 0, 24, 49, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/21/20`     <dbl> 0, 24, 49, 7, 10, 14, 14, 38, 39, 42, 15, 27, 12, 17,...
## $ `9/22/20`     <dbl> 0, 25, 49, 7, 10, 14, 14, 38, 40, 42, 15, 28, 12, 17,...
## $ `9/23/20`     <dbl> 0, 25, 49, 7, 10, 15, 14, 39, 40, 42, 15, 30, 12, 17,...
## $ `9/24/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 42, 42, 13, 30, 12, 16,...
## $ `9/25/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/26/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/27/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/28/20`     <dbl> 0, 25, 50, 7, 10, 15, 14, 39, 44, 42, 13, 30, 12, 16,...
## $ `9/29/20`     <dbl> 0, 27, 50, 7, 10, 15, 14, 40, 44, 42, 13, 30, 12, 16,...
## $ `9/30/20`     <dbl> 0, 27, 52, 7, 10, 15, 15, 40, 44, 42, 13, 30, 12, 16,...
## $ `10/1/20`     <dbl> 0, 28, 53, 7, 10, 15, 16, 40, 44, 42, 13, 30, 12, 16,...
## Observations: 811,530
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

# STEP 3: Create appropriately filtered data without creating new clusters
clust_20201003 <- prepClusterCounties(burdenFile=burden_20201003, 
                                      maxDate=maxDate_20201003, 
                                      minPop=25000,
                                      createClusters=FALSE
                                      )
## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0214              0          0.179          0.0761
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>
# STEP 4: Assess the existing clusters against the new data, including the cumulative plot
helperACC_county_20201003 <- helperAssessCountyClusters(vecCluster=clustVec_county_20200903_new, 
                                                        dfPop=clust_20201003$countyFiltered, 
                                                        dfBurden=clust_20201003$countyFiltered, 
                                                        showCum=TRUE,
                                                        thruLabel="Sep 30, 2020", 
                                                        plotsTogether=TRUE, 
                                                        orderCluster=TRUE
                                                        )

## 
## Recency is defined as 2020-09-01 through current
## 
## Recency is defined as 2020-09-01 through current
## Warning: Removed 1 rows containing missing values (geom_point).

# STEP 5: Add back clusters not used for analysis (code 999) and associated disease data
clusterStateData_20201003 <- helperMakeClusterStateData(helperACC_county_20201003, 
                                                        dfBurden=clust_20201003$countyDailyPerCapita,
                                                        orderCluster=TRUE
                                                        )
## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"
# STEP 6: Run an example state-level summary using several of the largest states
stateCountySummary(states=c("CA", "TX", "FL", "NY", "IL", "PA"),
                   df=changeOrderLabel(clusterStateData_20201003, grpVars="fipsCounty"),
                   keyDate=maxDate_20201003,
                   showQuadrants=TRUE, 
                   showCumulative=TRUE, 
                   facetCumulativeByState = TRUE, 
                   showAllFactorLevels = TRUE
                   )

# STEP 7: Assess lags for largest counties and counties with highest death rates
# STEP 7a: Save data for top-100 counties by death rate (filtered to only counties of 100k+ population)
top100_dpm_20201003 <- exploreTopCounties(helperACC_county_20201003, 
                                          minDate="2020-03-01", 
                                          maxDate="2020-09-30", 
                                          minPop=100000,
                                          nVar="dpm", 
                                          nKey=100, 
                                          plotData=FALSE
                                          )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 100 x 5
##    state cluster countyName          dpm     pop
##    <chr> <fct>   <chr>             <dbl>   <dbl>
##  1 36005 3       3 - Bronx (NY)    3492. 1418207
##  2 36081 3       3 - Queens (NY)   3218. 2253858
##  3 36047 3       3 - Kings (NY)    2863. 2559903
##  4 34013 3       3 - Essex (NJ)    2663.  798975
##  5 34031 3       3 - Passaic (NJ)  2499.  501826
##  6 34039 3       3 - Union (NJ)    2437.  556341
##  7 36085 3       3 - Richmond (NY) 2281.  476143
##  8 34017 3       3 - Hudson (NJ)   2256.  672391
##  9 34003 3       3 - Bergen (NJ)   2194.  932202
## 10 48061 3       3 - Cameron (TX)  2155.  423163
## # ... with 90 more rows
## 
## The best lags are:
## # A tibble: 100 x 5
##    countyName            state cluster  corr   lag
##    <chr>                 <chr> <fct>   <dbl> <int>
##  1 3 - Hampden (MA)      25013 3       0.924     0
##  2 3 - San Juan (NM)     35045 3       0.836     0
##  3 4 - Baltimore cityMD) 24510 4       0.535     0
##  4 4 - Coconino (AZ)     04005 4       0.265     0
##  5 4 - Hendricks (IN)    18063 4       0.467     0
##  6 4 - Johnson (IN)      18081 4       0.527     0
##  7 4 - Marion (IN)       18097 4       0.544     0
##  8 4 - Montgomery (AL)   01101 4       0.761     0
##  9 4 - Montgomery (MD)   24031 4       0.895     0
## 10 4 - Oakland (MI)      26125 4       0.478     0
## # ... with 90 more rows
glimpse(top100_dpm_20201003)
## Observations: 21,200
## Variables: 9
## Groups: countyName, state, cluster [100]
## $ countyName <chr> "1 - Anderson (SC)", "1 - Highlands (FL)", "1 - Webb (TX...
## $ state      <chr> "45007", "12055", "48479", "34003", "36005", "48061", "2...
## $ cluster    <fct> 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,...
## $ date       <date> 2020-03-01, 2020-03-01, 2020-03-01, 2020-03-01, 2020-03...
## $ cpm7       <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.000000...
## $ dpm7       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ corr       <dbl> 0.9199305, 0.7872620, 0.8755114, 0.8252094, 0.9747650, 0...
## $ lag        <int> 26, 28, 30, 9, 6, 11, 8, 7, 9, 0, 2, 8, 26, 8, 6, 8, 10,...
## $ cpmlag     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, N...
# STEP 7b: Top-3 by population by cluster with plots
exploreTopCounties(helperACC_county_20201003, 
                   minDate="2020-03-01", 
                   maxDate="2020-09-30", 
                   topNBy="cluster",
                   nVar="pop", 
                   nKey=3, 
                   plotData=TRUE
                   )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 15 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  655. 10039107
##  2 17031 4       4 - Cook (IL)        1015.  5150233
##  3 48201 1       1 - Harris (TX)       551.  4713325
##  4 04013 4       4 - Maricopa (AZ)     756.  4485414
##  5 06073 2       2 - San Diego (CA)    235.  3338330
##  6 06059 1       1 - Orange (CA)       399.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1207.  2716940
##  8 48113 1       1 - Dallas (TX)       430.  2635516
##  9 36047 3       3 - Kings (NY)       2863.  2559903
## 10 36081 3       3 - Queens (NY)      3218.  2253858
## 11 53033 5       5 - King (WA)         337.  2252782
## 12 06085 2       2 - Santa Clara (CA)  167.  1927852
## 13 26163 3       3 - Wayne (MI)       1706.  1749343
## 14 06001 2       2 - Alameda (CA)      247.  1671329
## 15 39049 5       5 - Franklin (OH)     467.  1316756

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName           state cluster  corr   lag
##    <chr>                <chr> <fct>   <dbl> <int>
##  1 5 - Franklin (OH)    39049 5       0.218     0
##  2 5 - Los Angeles (CA) 06037 5       0.616     0
##  3 2 - San Diego (CA)   06073 2       0.646     2
##  4 5 - King (WA)        53033 5       0.198     2
##  5 4 - Cook (IL)        17031 4       0.836     5
##  6 3 - Kings (NY)       36047 3       0.965     8
##  7 3 - Queens (NY)      36081 3       0.941     8
##  8 1 - Dallas (TX)      48113 1       0.782     9
##  9 3 - Wayne (MI)       26163 3       0.844    10
## 10 1 - Harris (TX)      48201 1       0.740    13
## 11 4 - Maricopa (AZ)    04013 4       0.965    18
## 12 1 - Orange (CA)      06059 1       0.757    24
## 13 2 - Alameda (CA)     06001 2       0.727    24
## 14 2 - Santa Clara (CA) 06085 2       0.487    30
## 15 4 - Miami-Dade (FL)  12086 4       0.785    30
## Warning: Removed 26 rows containing missing values (geom_path).
## Warning: Removed 26 rows containing missing values (geom_path).

## # A tibble: 3,180 x 9
## # Groups:   countyName, state, cluster [15]
##    countyName           state cluster date         cpm7  dpm7  corr   lag cpmlag
##    <chr>                <chr> <fct>   <date>      <dbl> <dbl> <dbl> <int>  <dbl>
##  1 1 - Dallas (TX)      48113 1       2020-03-01 0          0 0.782     9     NA
##  2 1 - Harris (TX)      48201 1       2020-03-01 0          0 0.740    13     NA
##  3 1 - Orange (CA)      06059 1       2020-03-01 0.0900     0 0.757    24     NA
##  4 2 - Alameda (CA)     06001 2       2020-03-01 0.0855     0 0.727    24     NA
##  5 2 - San Diego (CA)   06073 2       2020-03-01 0          0 0.646     2     NA
##  6 2 - Santa Clara (CA) 06085 2       2020-03-01 1.26       0 0.487    30     NA
##  7 3 - Kings (NY)       36047 3       2020-03-01 0          0 0.965     8     NA
##  8 3 - Queens (NY)      36081 3       2020-03-01 0          0 0.941     8     NA
##  9 3 - Wayne (MI)       26163 3       2020-03-01 0          0 0.844    10     NA
## 10 4 - Cook (IL)        17031 4       2020-03-01 0.0555     0 0.836     5     NA
## # ... with 3,170 more rows
# STEP 7c: Top-5 by dpm by cluster with plots
exploreTopCounties(helperACC_county_20201003, 
                   minDate="2020-03-01", 
                   maxDate="2020-09-30", 
                   topNBy="cluster",
                   nVar="dpm", 
                   nKey=5, 
                   plotData=TRUE
                   )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 25 x 5
##    state cluster countyName            dpm     pop
##    <chr> <fct>   <chr>               <dbl>   <dbl>
##  1 36005 3       3 - Bronx (NY)      3492. 1418207
##  2 36081 3       3 - Queens (NY)     3218. 2253858
##  3 36047 3       3 - Kings (NY)      2863. 2559903
##  4 34013 3       3 - Essex (NJ)      2663.  798975
##  5 34031 3       3 - Passaic (NJ)    2499.  501826
##  6 48215 4       4 - Hidalgo (TX)    1755.  868707
##  7 04027 4       4 - Yuma (AZ)       1614.  213787
##  8 22017 4       4 - Caddo (LA)      1503.  240204
##  9 44007 4       4 - Providence (RI) 1398.  638931
## 10 45041 4       4 - Florence (SC)   1381.  138293
## # ... with 15 more rows

## 
## The best lags are:
## # A tibble: 25 x 5
##    countyName          state cluster    corr   lag
##    <chr>               <chr> <fct>     <dbl> <int>
##  1 2 - Pulaski (AR)    05119 2        0.558      0
##  2 5 - Columbiana (OH) 39029 5       -0.0149     0
##  3 5 - Litchfield (CT) 09005 5        0.922      4
##  4 4 - Caddo (LA)      22017 4        0.210      5
##  5 3 - Bronx (NY)      36005 3        0.975      6
##  6 3 - Essex (NJ)      34013 3        0.876      7
##  7 3 - Kings (NY)      36047 3        0.965      8
##  8 3 - Queens (NY)     36081 3        0.941      8
##  9 4 - Florence (SC)   45041 4        0.856      8
## 10 3 - Passaic (NJ)    34031 3        0.810      9
## # ... with 15 more rows
## Warning: Removed 158 rows containing missing values (geom_path).
## Warning: Removed 158 rows containing missing values (geom_path).

## # A tibble: 5,300 x 9
## # Groups:   countyName, state, cluster [25]
##    countyName         state cluster date        cpm7  dpm7  corr   lag cpmlag
##    <chr>              <chr> <fct>   <date>     <dbl> <dbl> <dbl> <int>  <dbl>
##  1 1 - Anderson (SC)  45007 1       2020-03-01     0     0 0.920    26     NA
##  2 1 - Highlands (FL) 12055 1       2020-03-01     0     0 0.787    28     NA
##  3 1 - Marion (FL)    12083 1       2020-03-01     0     0 0.904    27     NA
##  4 1 - Newton (GA)    13217 1       2020-03-01     0     0 0.781    26     NA
##  5 1 - Webb (TX)      48479 1       2020-03-01     0     0 0.876    30     NA
##  6 2 - Cabarrus (NC)  37025 2       2020-03-01     0     0 0.420    20     NA
##  7 2 - DeKalb (IL)    17037 2       2020-03-01     0     0 0.428    15     NA
##  8 2 - Douglas (NE)   31055 2       2020-03-01     0     0 0.539    18     NA
##  9 2 - Pulaski (AR)   05119 2       2020-03-01     0     0 0.558     0      0
## 10 2 - Warren (KY)    21227 2       2020-03-01     0     0 0.596    16     NA
## # ... with 5,290 more rows

CDC All-Cause Deaths

The CDC maintain public data for all-cause deaths by week and jurisdiction, available at CDC Weekly Deaths by Jurisdiction.

These data are known to have a lag between death and reporting, and the CDC back-correct to report deaths at the time the death occurred even if the death is reported in following weeks. This means totals for recent weeks tend to run low (lag), and the CDC run a projection of the expected total number of deaths given the historical lag times. Per other analysts on the internet, there is currently significant supra-lag, with lag times much longer than historical averages causing CDC projected deaths for recent weeks to be low.

Key Functions (CDC All-Cause Deaths)

Functions for reading and analyzing CDC all-cause deaths data have been sourced above from Coronavirus_Statistics_Functions_v002.R.

Running Key Analyses (CDC All-Cause Deaths)

The CDC data can be loaded and analyzed:

# Read and process the CDC data
cdc20200923 <- readProcessCDC("Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20200923.csv", 
                              weekThru=30
                              )
## Observations: 174,311
## Variables: 11
## $ Jurisdiction         <chr> "Alabama", "Alabama", "Alabama", "Alabama", "A...
## $ `Week Ending Date`   <chr> "1/10/2015", "1/17/2015", "1/24/2015", "1/31/2...
## $ `State Abbreviation` <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"...
## $ Year                 <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ Week                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ `Age Group`          <chr> "25-44 years", "25-44 years", "25-44 years", "...
## $ `Number of Deaths`   <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50...
## $ `Time Period`        <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2...
## $ Type                 <chr> "Predicted (weighted)", "Predicted (weighted)"...
## $ Suppress             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 174,311
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <chr> "25-44 years", "25-44 years", "25-44 years", "25-44 ye...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2019", "2...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Check Control Levels and Record Counts for Renamed Data:
## # A tibble: 6 x 4
##   age                    n n_deaths_na   deaths
##   <chr>              <int>       <int>    <dbl>
## 1 25-44 years        26066           3  3170502
## 2 45-64 years        31884           7 12464677
## 3 65-74 years        31880          10 12347549
## 4 75-84 years        31902          11 15363924
## 5 85 years and older 31888           9 20067928
## 6 Under 25 years     20691           0  1372225
## # A tibble: 12 x 6
## # Groups:   period, year [6]
##    period     year type                     n n_deaths_na  deaths
##    <chr>     <int> <chr>                <int>       <int>   <dbl>
##  1 2015-2019  2015 Predicted (weighted) 15285           0 5416393
##  2 2015-2019  2015 Unweighted           15285           0 5416393
##  3 2015-2019  2016 Predicted (weighted) 15365           0 5483764
##  4 2015-2019  2016 Unweighted           15365           0 5483764
##  5 2015-2019  2017 Predicted (weighted) 15318           0 5643350
##  6 2015-2019  2017 Unweighted           15318           0 5643350
##  7 2015-2019  2018 Predicted (weighted) 15305           0 5698002
##  8 2015-2019  2018 Unweighted           15305           0 5698002
##  9 2015-2019  2019 Predicted (weighted) 15319           0 5725516
## 10 2015-2019  2019 Unweighted           15319           0 5725516
## 11 2020       2020 Predicted (weighted) 10586          24 4476340
## 12 2020       2020 Unweighted           10541          16 4376415
## # A tibble: 3 x 5
## # Groups:   period [2]
##   period    Suppress                                       n n_deaths_na  deaths
##   <chr>     <chr>                                      <int>       <int>   <dbl>
## 1 2015-2019 <NA>                                      153184           0  5.59e7
## 2 2020      Suppressed (counts highly incomplete, <5~     40          40  0.    
## 3 2020      <NA>                                       21087           0  8.85e6
## # A tibble: 9 x 5
## # Groups:   period [2]
##   period   Note                                            n n_deaths_na  deaths
##   <chr>    <chr>                                       <int>       <int>   <dbl>
## 1 2015-20~ <NA>                                       153184           0  5.59e7
## 2 2020     Data in recent weeks are incomplete. Only~  16591           0  7.27e6
## 3 2020     Data in recent weeks are incomplete. Only~    324           0  1.52e5
## 4 2020     Data in recent weeks are incomplete. Only~    288          30  3.05e4
## 5 2020     Data in recent weeks are incomplete. Only~   1502          10  4.17e5
## 6 2020     Data in recent weeks are incomplete. Only~     60           0  2.71e4
## 7 2020     Estimates for Pennsylvania are too low fo~     48           0  2.23e4
## 8 2020     Weights may be too low to account for und~    436           0  1.40e5
## 9 2020     <NA>                                         1878           0  7.97e5
##    state         Jurisdiction    n n_deaths_na   deaths
## 1     US        United States 3552           0 32276762
## 2     CA           California 3552           0  3072016
## 3     FL              Florida 3552           0  2357528
## 4     TX                Texas 3552           0  2314502
## 5     PA         Pennsylvania 3552           0  1548716
## 6     OH                 Ohio 3552           0  1395014
## 7     IL             Illinois 3552           0  1219119
## 8     NY             New York 3552           0  1155790
## 9     MI             Michigan 3552           0  1111203
## 10    NC       North Carolina 3521          17  1051632
## 11    GA              Georgia 3551           0   965504
## 12    NJ           New Jersey 3546           0   867210
## 13    TN            Tennessee 3552           0   840787
## 14    VA             Virginia 3552           0   774383
## 15    IN              Indiana 3550           0   749760
## 16    MO             Missouri 3548           0   728220
## 17    MA        Massachusetts 3516           0   685409
## 18    AZ              Arizona 3552           0   684537
## 19    YC        New York City 3548           0   671106
## 20    WA           Washington 3551           0   645406
## 21    AL              Alabama 3550           0   598526
## 22    WI            Wisconsin 3533           0   592047
## 23    MD             Maryland 3546           0   570238
## 24    SC       South Carolina 3549           0   560415
## 25    KY             Kentucky 3519           0   545032
## 26    LA            Louisiana 3545           0   525668
## 27    MN            Minnesota 3509           0   503567
## 28    CO             Colorado 3550           0   446708
## 29    OK             Oklahoma 3541           0   445362
## 30    OR               Oregon 3382           0   413553
## 31    MS          Mississippi 3488           0   363792
## 32    AR             Arkansas 3444           0   361612
## 33    CT          Connecticut 3106          13   356416
## 34    IA                 Iowa 3190           0   339791
## 35    PR          Puerto Rico 3272           0   331654
## 36    KS               Kansas 3246           0   296520
## 37    NV               Nevada 3291           0   289275
## 38    WV        West Virginia 3011          10   251046
## 39    UT                 Utah 3438           0   213931
## 40    NM           New Mexico 3140           0   205026
## 41    NE             Nebraska 2846           0   188492
## 42    ME                Maine 2646           0   160341
## 43    ID                Idaho 2766           0   152936
## 44    NH        New Hampshire 2672           0   135110
## 45    HI               Hawaii 2556           0   124379
## 46    RI         Rhode Island 2474           0   114274
## 47    MT              Montana 2556           0   109719
## 48    DE             Delaware 2558           0    99625
## 49    SD         South Dakota 2448           0    86461
## 50    ND         North Dakota 2433           0    75079
## 51    DC District of Columbia 2545           0    64202
## 52    VT              Vermont 2336           0    61925
## 53    WY              Wyoming 2318           0    47230
## 54    AK               Alaska 2352           0    42249

## Observations: 174,311
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 85,466
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Confirm that data suppressions and NA deaths have been aliminated:
## # A tibble: 0 x 11
## # ... with 11 variables: Jurisdiction <chr>, weekEnding <date>, state <chr>,
## #   year <fct>, week <int>, age <fct>, deaths <dbl>, period <fct>, type <chr>,
## #   Suppress <chr>, Note <chr>
## Observations: 80,379
## Variables: 12
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-10, 2015-01-10, 2015-01-10, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ...
## $ age          <fct> Under 25 years, 25-44 years, 45-64 years, 65-74 years,...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ n            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ deaths       <dbl> 25, 67, 253, 202, 272, 320, 28, 49, 256, 222, 253, 332...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## First duplicate is in row number (0 means no duplicates): 0
# Generate plots of the processed CDC data
cdcBasicPlots(cdc20200923, clustVec=test_hier5$useClusters)

# Example cohort analysis for age 65+
list_65plus <- cdcCohortAnalysis(cohortName="65+ years old", 
                                 df=cdc20200923,
                                 critFilter=list("age"=c("65-74 years", 
                                                         "75-84 years", 
                                                         "85 years and older"
                                                         )
                                                 ), 
                                 curYear=2020, 
                                 startYear=2015,
                                 startWeek=9,
                                 plotTitle="All-cause deaths for 65+ years old cohort", 
                                 predActualPlotsOnePage=TRUE
                                 )

# Example cohort analysis for full US
list_allUS <- cdcCohortAnalysis(cohortName="all ages, all states", 
                                df=cdc20200923,
                                curYear=2020, 
                                startYear=2015,
                                startWeek=9,
                                plotTitle="All-cause US total deaths",
                                predActualPlotsOnePage=TRUE
                                )

# Example cohort analysis for handful of state hit early
list_early <- cdcCohortAnalysis(cohortName="all ages, NY/NJ/CT/MA", 
                                df=cdc20200923,
                                critFilter=list("state"=c("NY", "NJ", "CT", "MA")),
                                curYear=2020, 
                                startYear=2015,
                                startWeek=9,
                                plotTitle="All-cause total deaths for NY/NJ/CT/MA",
                                predActualPlotsOnePage=TRUE
                                )

Next steps are to continue with the CDC aggregates function (analyze by cluster or age or state ot etc.):

# Testing the aggregation function for cluster
# Need to fix plot chart titles
clusterList_hier5_201001 <- helperKeyStateClusterMetrics(test_hier5_201001)
clusterAgg_20200923 <- cdcAggregateSummary(df=cdc20200923, 
                                           critVar="state", 
                                           critSubsets=clusterList_hier5_201001$stateCluster,
                                           startWeek=9, 
                                           critListNames=paste0("cluster ", 1:5),
                                           factorCritList=FALSE,
                                           popData=clusterList_hier5_201001$pop,
                                           cvDeathData=clusterList_hier5_201001$deaths,
                                           idVarName="cluster"
                                           )

# Testing the aggregation function for state (no plots)
stateAgg_20200923 <- cdcAggregateSummary(df=cdc20200923, 
                                         critVar="state", 
                                         critSubsets=names(clusterList_hier5_201001$clData),
                                         startWeek=9, 
                                         idVarName="state", 
                                         subListNames=names(clusterList_hier5_201001$clData),
                                         showAllPlots=FALSE
                                         )


# Create a mapping of epiweek to month (use 2020 for this)
epiMonth <- tibble::tibble(dt=as.Date("2020-01-01")+0:365, 
                           month=lubridate::month(dt),
                           quarter=lubridate::quarter(dt),
                           ew=lubridate::epiweek(dt)
                           ) %>%
    count(ew, month, quarter) %>%
    arrange(ew, -n) %>%
    group_by(ew) %>%
    summarize(month=factor(month.abb[first(month)], levels=month.abb), quarter=first(quarter))

# Create plots by state
helperKeyStateExcessPlots(df=stateAgg_20200923, 
                          epiMonth=epiMonth,
                          cvDeaths=test_hier5_201001$consolidatedPlotData,
                          startWeek=10,
                          cvDeathDate=as.Date("2020-07-31"),
                          subT="CDC data through July 2020 (Q3 incomplete)"
                          )
## Warning: Column `state` joining factor and character vector, coercing into
## character vector

## Joining, by = "state"

## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).

## # A tibble: 357 x 7
##    state quarter month postStart  excess     pop excesspm
##    <chr> <fct>   <fct>     <dbl>   <dbl>   <dbl>    <dbl>
##  1 AK    Q1-2020 Jan           0   16.3   738432    22.1 
##  2 AK    Q1-2020 Feb           0    9.85  738432    13.3 
##  3 AK    Q1-2020 Mar           1  -21.6   738432   -29.2 
##  4 AK    Q2-2020 Apr           1   -9.29  738432   -12.6 
##  5 AK    Q2-2020 May           1   28.0   738432    38.0 
##  6 AK    Q2-2020 Jun           1   16.4   738432    22.3 
##  7 AK    Q3-2020 Jul           1    5.85  738432     7.92
##  8 AL    Q1-2020 Jan           0 -527.   4858979  -108.  
##  9 AL    Q1-2020 Feb           0  -84.7  4858979   -17.4 
## 10 AL    Q1-2020 Mar           1  -74.5  4858979   -15.3 
## # ... with 347 more rows
# Testing the aggregation function for age (no plots)
ageAgg_20200923 <- cdcAggregateSummary(df=cdc20200923, 
                                       critVar="age", 
                                       critSubsets=levels(cdc20200923$age),
                                       startWeek=9, 
                                       idVarName="age", 
                                       subListNames=levels(cdc20200923$age),
                                       showAllPlots=TRUE
                                       )

# Estimated US population by age (2020)
usPopAge2020 <- survival::uspop2[, , "2020"] %>%
    apply(1, FUN=sum) %>%
    tibble::tibble(ageActual=as.integer(names(.)), pop_2020=.)
usPopAge2020
## # A tibble: 101 x 2
##    ageActual pop_2020
##        <int>    <int>
##  1         0  4583634
##  2         1  4575533
##  3         2  4567477
##  4         3  4561292
##  5         4  4557600
##  6         5  4555265
##  7         6  4552366
##  8         7  4548108
##  9         8  4542047
## 10         9  4534181
## # ... with 91 more rows
# Sums by age bucket
usPopBucket2020 <- usPopAge2020 %>%
    mutate(age=factor(case_when(ageActual <= 24 ~ "Under 25 years", 
                                ageActual <= 44 ~ "25-44 years", 
                                ageActual <= 64 ~ "45-64 years", 
                                ageActual <= 74 ~ "65-74 years", 
                                ageActual <= 84 ~ "75-84 years", 
                                TRUE ~ "85 years and older"
                                ), levels=levels(cdc20200923$age)
                      )
           ) %>%
    group_by(age) %>%
    summarize(pop=sum(pop_2020))
usPopBucket2020
## # A tibble: 6 x 2
##   age                      pop
##   <fct>                  <int>
## 1 Under 25 years     112501697
## 2 25-44 years         89724301
## 3 45-64 years         84356197
## 4 65-74 years         32312186
## 5 75-84 years         15895265
## 6 85 years and older   6597019
# Create plots by age
helperKeyAgeExcessPlots(df=ageAgg_20200923, 
                        epiMonth=epiMonth,
                        cvDeaths=test_hier5_201001$consolidatedPlotData,
                        popData=usPopBucket2020,
                        startWeek=10,
                        cvDeathDate=as.Date("2020-07-31"),
                        subT="CDC data through July 2020 (Q3 incomplete)"
                        )

## # A tibble: 42 x 7
##    age            quarter month postStart excess       pop excesspm
##    <fct>          <fct>   <fct>     <dbl>  <dbl>     <int>    <dbl>
##  1 Under 25 years Q1-2020 Jan           0  249.  112501697    2.21 
##  2 Under 25 years Q1-2020 Feb           0  113.  112501697    1.00 
##  3 Under 25 years Q1-2020 Mar           1  -72.4 112501697   -0.644
##  4 Under 25 years Q2-2020 Apr           1  154.  112501697    1.37 
##  5 Under 25 years Q2-2020 May           1  216.  112501697    1.92 
##  6 Under 25 years Q2-2020 Jun           1  468.  112501697    4.16 
##  7 Under 25 years Q3-2020 Jul           1  519.  112501697    4.61 
##  8 25-44 years    Q1-2020 Jan           0  337.   89724301    3.76 
##  9 25-44 years    Q1-2020 Feb           0  236.   89724301    2.63 
## 10 25-44 years    Q1-2020 Mar           1  851.   89724301    9.48 
## # ... with 32 more rows
# Create data at the level of elderly and non-elderly, using 65 as the start if elderly
ageOld <- c("65-74 years", "75-84 years", "85 years and older")
cdcTwoAge <- cdc20200923 %>%
    mutate(ageSplit=factor(ifelse(age %in% ageOld, "65 and over", "Under 65"), 
                           levels=c("Under 65", "65 and over")
                           )
           ) %>%
    group_by(state, year, week, ageSplit) %>%
    summarize(deaths=sum(deaths)) %>%
    ungroup() %>%
    mutate(keyVar=paste(ageSplit, state, sep="_"))
cdcTwoAge
## # A tibble: 29,552 x 6
##    state year   week ageSplit    deaths keyVar        
##    <chr> <fct> <int> <fct>        <dbl> <chr>         
##  1 AK    2015      1 Under 65        22 Under 65_AK   
##  2 AK    2015      1 65 and over     40 65 and over_AK
##  3 AK    2015      2 Under 65        22 Under 65_AK   
##  4 AK    2015      2 65 and over     55 65 and over_AK
##  5 AK    2015      3 Under 65        19 Under 65_AK   
##  6 AK    2015      3 65 and over     53 65 and over_AK
##  7 AK    2015      4 Under 65        17 Under 65_AK   
##  8 AK    2015      4 65 and over     51 65 and over_AK
##  9 AK    2015      5 Under 65        41 Under 65_AK   
## 10 AK    2015      5 65 and over     33 65 and over_AK
## # ... with 29,542 more rows
# Testing the aggregation function for state and elderly (no plots)
ageStateAgg_20200923 <- cdcAggregateSummary(df=cdcTwoAge, 
                                            critVar="keyVar", 
                                            critSubsets=unique(cdcTwoAge$keyVar),
                                            startWeek=9, 
                                            idVarName="keyVar", 
                                            subListNames=unique(cdcTwoAge$keyVar),
                                            showAllPlots=FALSE
                                            ) %>%
    tidyr::separate(keyVar, into=c("age", "state"), sep="_")

# Plot of excess deaths ratio by state and age
ageStateAgg_20200923 %>%
    filter(year==2020, week>=10) %>%
    group_by(age, state) %>%
    summarize(excess=sum(delta)) %>%
    ggplot(aes(x=fct_reorder(state, excess, .fun=sum), y=excess/1000, fill=age)) + 
    geom_col(position="fill") + 
    coord_flip() + 
    labs(x="State", 
         y="% of All-cause excess deaths", 
         title="Proportion of all-cause excess deaths by age cohort in 2020", 
         subtitle="CDC data March 2020 through July 2020"
         ) +
    scale_fill_discrete("Age Group") + 
    geom_hline(aes(yintercept=0.25), lty=2)

# Plots by age cohort
for (age in levels(cdcTwoAge$ageSplit)) {
    cdcCohortAnalysis(df=cdcTwoAge, 
                      cohortName=paste0("ages ", age), 
                      critFilter=list("ageSplit"=age),
                      plotTitle=paste0("All-cause total deaths for age ", age), 
                      showSubsetPlots=FALSE,
                      showPredActualPlots=TRUE,
                      predActualPlotsOnePage=TRUE
                      )
}

Next steps are to further clean up and integrate the functions so that the full process can be run with more recent data using fewer changes to the code.

Running the Full Process

First, data are downloaded from COVID Tracking Project (run only once) without creating clusters (this chunk is set to eval=FALSE to avoid over-writing previously downloaded data):

# Test function for hierarchical clustering with Vermont reassigned to New Hampshire
locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201010.csv"
test_hier5_201010 <- readRunCOVIDTrackingProject(thruLabel="Oct 9, 2020", 
                                                 downloadTo=locDownload, 
                                                 compareFile=test_hier5_201001$dfRaw,
                                                 useClusters=test_hier5_201001$useClusters
                                                 )

The segments are recreated from the downloaded data so that evolution in segment type and membership can be explored:

# Explore the newly downloaded data and the dendrogram if using for clusters
locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201010.csv"
test_hier_201010_dendonly <- readRunCOVIDTrackingProject(thruLabel="Oct 9, 2020", 
                                                         readFrom=locDownload, 
                                                         compareFile=test_hier5_201001$dfRaw,
                                                         hierarchical=TRUE, 
                                                         minShape=3, 
                                                         ratioDeathvsCase = 5, 
                                                         ratioTotalvsShape = 0.5, 
                                                         minDeath=100, 
                                                         minCase=10000, 
                                                         skipAssessmentPlots=TRUE
                                                         )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7623536        205470               8114969
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: probableCases
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-10-09 2020-10-08 2020-10-07 2020-10-06 2020-10-05 2020-10-04 2020-10-03 2020-10-02 2020-10-01
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
## [1] date     name     newValue oldValue
## <0 rows> (or 0-length row.names)
## Joining, by = c("date", "name")
## Warning: Removed 9 rows containing missing values (geom_path).
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state             name newValue oldValue
## 1    HI positiveIncrease    12469    12289
## Observations: 12,309
## Variables: 55
## $ date                        <date> 2020-10-09, 2020-10-09, 2020-10-09, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 10176, 163465, 90145, 0, 224084, 838606...
## $ probableCases               <dbl> NA, 19565, 4165, NA, 4837, NA, 5226, 24...
## $ negative                    <dbl> 482894, 1052552, 1025546, 1616, 1324198...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 493070, 1196452, 1111526, 1616, 1543445...
## $ hospitalizedCurrently       <dbl> 51, 816, 546, NA, 706, 3186, 356, 134, ...
## $ hospitalizedCumulative      <dbl> NA, 17989, 5805, NA, 20199, NA, 7834, 1...
## $ inIcuCurrently              <dbl> NA, NA, 237, NA, 155, 742, NA, NA, 27, ...
## $ inIcuCumulative             <dbl> NA, 1884, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 6, NA, 102, NA, 70, NA, NA, NA, 13, NA,...
## $ onVentilatorCumulative      <dbl> NA, 1063, 723, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 5734, 71240, 81563, NA, 36723, NA, 6756...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "10/9/2020 03:59", "10/9/2020 11:00", "...
## $ dateModified                <dttm> 2020-10-09 03:59:00, 2020-10-09 11:00:...
## $ checkTimeEt                 <chr> "10/08 23:59", "10/09 07:00", "10/07 20...
## $ death                       <dbl> 60, 2653, 1503, 0, 5746, 16428, 1997, 4...
## $ hospitalized                <dbl> NA, 17989, 5805, NA, 20199, NA, 7834, 1...
## $ dateChecked                 <dttm> 2020-10-09 03:59:00, 2020-10-09 11:00:...
## $ totalTestsViral             <dbl> 493070, 1196452, 1111526, 1616, NA, 157...
## $ positiveTestsViral          <dbl> 9256, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 483521, NA, 1025546, NA, NA, NA, NA, NA...
## $ positiveCasesViral          <dbl> 10176, 143900, 85980, 0, 219247, 838606...
## $ deathConfirmed              <dbl> 60, 2496, 1359, NA, 5460, NA, NA, 3631,...
## $ deathProbable               <dbl> NA, 157, 144, NA, 286, NA, NA, 899, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1496198, NA, 42...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1543445, NA, 984996, NA...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 300333, NA, 172162, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 12116, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 160046, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 59957, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 25538, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 4591, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 180, 1490, 0, 0, 683, 3806, 863, 290, 7...
## $ negativeIncrease            <dbl> 1719, 10506, 0, 0, 11699, 109068, 9322,...
## $ total                       <dbl> 493070, 1216017, 1115691, 1616, 1548282...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 1899, 11742, 0, 0, 12350, 112874, 18897...
## $ posNeg                      <dbl> 493070, 1216017, 1115691, 1616, 1548282...
## $ deathIncrease               <dbl> 0, 16, 0, 0, 3, 67, 7, 3, 0, 0, 118, 54...
## $ hospitalizedIncrease        <dbl> 0, 0, 0, 0, 28, 0, 63, 0, 0, 0, 196, 12...
## $ hash                        <chr> "3c160d82e90df263b41bbb98bcc4d2f6d17ff9...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        57271    800    NA    451888  1040
## 2 TRUE       7566265 204670    NA 112888209 11269
## Observations: 11,269
## Variables: 6
## $ date   <date> 2020-10-09, 2020-10-09, 2020-10-09, 2020-10-09, 2020-10-09,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 180, 1490, 0, 683, 3806, 863, 290, 78, 145, 2908, 1625, 101,...
## $ deaths <dbl> 0, 16, 0, 3, 67, 7, 3, 0, 0, 118, 54, 1, 16, 3, 32, 19, 40, ...
## $ hosp   <dbl> 51, 816, 546, 706, 3186, 356, 134, 99, 104, 2143, 1717, 111,...
## $ tests  <dbl> 1899, 11742, 0, 12350, 112874, 18897, 17281, 5409, 2865, 284...
## Observations: 11,269
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

The dendrogram reveals similarities in several of the main clusters - a clear early and high group (NJ, NY, CT, MA); a clear late and high group (GA, FL, SC, TX, AL, NV, AZ, MS); a clear early and medium group (MI, DE, PA, IN, IL, IN, MD); 1-2 groups that have had low death rates so far; and two groups (LA, DC, RI) and (VT, ME, CO, WA) that appear different than in previous analyses.

Cutting to seven segments should allow for further exploration of the two stand-alone groups of distinct states while leaving most other states in their previous segments. The full process is then run:

locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201010.csv"
test_hier7_201010 <- readRunCOVIDTrackingProject(thruLabel="Oct 9, 2020", 
                                                 readFrom=locDownload, 
                                                 compareFile=test_hier5_201001$dfRaw,
                                                 hierarchical=TRUE, 
                                                 kCut=7, 
                                                 minShape=3, 
                                                 ratioDeathvsCase = 5, 
                                                 ratioTotalvsShape = 0.5, 
                                                 minDeath=100, 
                                                 minCase=10000
                                                 )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7623536        205470               8114969
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: probableCases
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-10-09 2020-10-08 2020-10-07 2020-10-06 2020-10-05 2020-10-04 2020-10-03 2020-10-02 2020-10-01
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
## [1] date     name     newValue oldValue
## <0 rows> (or 0-length row.names)
## Joining, by = c("date", "name")
## Warning: Removed 9 rows containing missing values (geom_path).
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state             name newValue oldValue
## 1    HI positiveIncrease    12469    12289
## Observations: 12,309
## Variables: 55
## $ date                        <date> 2020-10-09, 2020-10-09, 2020-10-09, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 10176, 163465, 90145, 0, 224084, 838606...
## $ probableCases               <dbl> NA, 19565, 4165, NA, 4837, NA, 5226, 24...
## $ negative                    <dbl> 482894, 1052552, 1025546, 1616, 1324198...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 493070, 1196452, 1111526, 1616, 1543445...
## $ hospitalizedCurrently       <dbl> 51, 816, 546, NA, 706, 3186, 356, 134, ...
## $ hospitalizedCumulative      <dbl> NA, 17989, 5805, NA, 20199, NA, 7834, 1...
## $ inIcuCurrently              <dbl> NA, NA, 237, NA, 155, 742, NA, NA, 27, ...
## $ inIcuCumulative             <dbl> NA, 1884, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 6, NA, 102, NA, 70, NA, NA, NA, 13, NA,...
## $ onVentilatorCumulative      <dbl> NA, 1063, 723, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 5734, 71240, 81563, NA, 36723, NA, 6756...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "10/9/2020 03:59", "10/9/2020 11:00", "...
## $ dateModified                <dttm> 2020-10-09 03:59:00, 2020-10-09 11:00:...
## $ checkTimeEt                 <chr> "10/08 23:59", "10/09 07:00", "10/07 20...
## $ death                       <dbl> 60, 2653, 1503, 0, 5746, 16428, 1997, 4...
## $ hospitalized                <dbl> NA, 17989, 5805, NA, 20199, NA, 7834, 1...
## $ dateChecked                 <dttm> 2020-10-09 03:59:00, 2020-10-09 11:00:...
## $ totalTestsViral             <dbl> 493070, 1196452, 1111526, 1616, NA, 157...
## $ positiveTestsViral          <dbl> 9256, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 483521, NA, 1025546, NA, NA, NA, NA, NA...
## $ positiveCasesViral          <dbl> 10176, 143900, 85980, 0, 219247, 838606...
## $ deathConfirmed              <dbl> 60, 2496, 1359, NA, 5460, NA, NA, 3631,...
## $ deathProbable               <dbl> NA, 157, 144, NA, 286, NA, NA, 899, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1496198, NA, 42...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1543445, NA, 984996, NA...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 300333, NA, 172162, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 12116, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 160046, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 59957, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 25538, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 4591, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 180, 1490, 0, 0, 683, 3806, 863, 290, 7...
## $ negativeIncrease            <dbl> 1719, 10506, 0, 0, 11699, 109068, 9322,...
## $ total                       <dbl> 493070, 1216017, 1115691, 1616, 1548282...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 1899, 11742, 0, 0, 12350, 112874, 18897...
## $ posNeg                      <dbl> 493070, 1216017, 1115691, 1616, 1548282...
## $ deathIncrease               <dbl> 0, 16, 0, 0, 3, 67, 7, 3, 0, 0, 118, 54...
## $ hospitalizedIncrease        <dbl> 0, 0, 0, 0, 28, 0, 63, 0, 0, 0, 196, 12...
## $ hash                        <chr> "3c160d82e90df263b41bbb98bcc4d2f6d17ff9...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        57271    800    NA    451888  1040
## 2 TRUE       7566265 204670    NA 112888209 11269
## Observations: 11,269
## Variables: 6
## $ date   <date> 2020-10-09, 2020-10-09, 2020-10-09, 2020-10-09, 2020-10-09,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 180, 1490, 0, 683, 3806, 863, 290, 78, 145, 2908, 1625, 101,...
## $ deaths <dbl> 0, 16, 0, 3, 67, 7, 3, 0, 0, 118, 54, 1, 16, 3, 32, 19, 40, ...
## $ hosp   <dbl> 51, 816, 546, 706, 3186, 356, 134, 99, 104, 2143, 1717, 111,...
## $ tests  <dbl> 1899, 11742, 0, 12350, 112874, 18897, 17281, 5409, 2865, 284...
## Observations: 11,269
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

## 
## Recency is defined as 2020-09-10 through current
## 
## Recency is defined as 2020-09-10 through current

The LA, DC, RI segment does not merit being on its own. It has a small population (6 million) and has broadly the same shape, with somewhat higher deaths, as the much larger MI, DE, PA, IN, IL, MD segment. The number of segments can be cut back to 6 as this is the first segment that would be consolidated by that.

The VT, ME, CO, WA segment also questionably merit stand-alone treatment. It is a variation of the segment that had mild disease early, but is rather differentiated on the dendrogram and has a population (15 million) that is at least in the same order of magnitude as the other clusters.

Suppose that 6 segments are created, using the parameters “as is”:

locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201010.csv"
test_hier6_201010 <- readRunCOVIDTrackingProject(thruLabel="Oct 9, 2020", 
                                                 readFrom=locDownload, 
                                                 compareFile=test_hier5_201001$dfRaw,
                                                 hierarchical=TRUE, 
                                                 kCut=6, 
                                                 minShape=3, 
                                                 ratioDeathvsCase = 5, 
                                                 ratioTotalvsShape = 0.5, 
                                                 minDeath=100, 
                                                 minCase=10000
                                                 )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7623536        205470               8114969
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: probableCases
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-10-09 2020-10-08 2020-10-07 2020-10-06 2020-10-05 2020-10-04 2020-10-03 2020-10-02 2020-10-01
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
## [1] date     name     newValue oldValue
## <0 rows> (or 0-length row.names)
## Joining, by = c("date", "name")
## Warning: Removed 9 rows containing missing values (geom_path).
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state             name newValue oldValue
## 1    HI positiveIncrease    12469    12289
## Observations: 12,309
## Variables: 55
## $ date                        <date> 2020-10-09, 2020-10-09, 2020-10-09, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 10176, 163465, 90145, 0, 224084, 838606...
## $ probableCases               <dbl> NA, 19565, 4165, NA, 4837, NA, 5226, 24...
## $ negative                    <dbl> 482894, 1052552, 1025546, 1616, 1324198...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 493070, 1196452, 1111526, 1616, 1543445...
## $ hospitalizedCurrently       <dbl> 51, 816, 546, NA, 706, 3186, 356, 134, ...
## $ hospitalizedCumulative      <dbl> NA, 17989, 5805, NA, 20199, NA, 7834, 1...
## $ inIcuCurrently              <dbl> NA, NA, 237, NA, 155, 742, NA, NA, 27, ...
## $ inIcuCumulative             <dbl> NA, 1884, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 6, NA, 102, NA, 70, NA, NA, NA, 13, NA,...
## $ onVentilatorCumulative      <dbl> NA, 1063, 723, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 5734, 71240, 81563, NA, 36723, NA, 6756...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "10/9/2020 03:59", "10/9/2020 11:00", "...
## $ dateModified                <dttm> 2020-10-09 03:59:00, 2020-10-09 11:00:...
## $ checkTimeEt                 <chr> "10/08 23:59", "10/09 07:00", "10/07 20...
## $ death                       <dbl> 60, 2653, 1503, 0, 5746, 16428, 1997, 4...
## $ hospitalized                <dbl> NA, 17989, 5805, NA, 20199, NA, 7834, 1...
## $ dateChecked                 <dttm> 2020-10-09 03:59:00, 2020-10-09 11:00:...
## $ totalTestsViral             <dbl> 493070, 1196452, 1111526, 1616, NA, 157...
## $ positiveTestsViral          <dbl> 9256, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ negativeTestsViral          <dbl> 483521, NA, 1025546, NA, NA, NA, NA, NA...
## $ positiveCasesViral          <dbl> 10176, 143900, 85980, 0, 219247, 838606...
## $ deathConfirmed              <dbl> 60, 2496, 1359, NA, 5460, NA, NA, 3631,...
## $ deathProbable               <dbl> NA, 157, 144, NA, 286, NA, NA, 899, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1496198, NA, 42...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1543445, NA, 984996, NA...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 300333, NA, 172162, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 12116, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 160046, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 59957, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 25538, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 4591, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 180, 1490, 0, 0, 683, 3806, 863, 290, 7...
## $ negativeIncrease            <dbl> 1719, 10506, 0, 0, 11699, 109068, 9322,...
## $ total                       <dbl> 493070, 1216017, 1115691, 1616, 1548282...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 1899, 11742, 0, 0, 12350, 112874, 18897...
## $ posNeg                      <dbl> 493070, 1216017, 1115691, 1616, 1548282...
## $ deathIncrease               <dbl> 0, 16, 0, 0, 3, 67, 7, 3, 0, 0, 118, 54...
## $ hospitalizedIncrease        <dbl> 0, 0, 0, 0, 28, 0, 63, 0, 0, 0, 196, 12...
## $ hash                        <chr> "3c160d82e90df263b41bbb98bcc4d2f6d17ff9...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        57271    800    NA    451888  1040
## 2 TRUE       7566265 204670    NA 112888209 11269
## Observations: 11,269
## Variables: 6
## $ date   <date> 2020-10-09, 2020-10-09, 2020-10-09, 2020-10-09, 2020-10-09,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 180, 1490, 0, 683, 3806, 863, 290, 78, 145, 2908, 1625, 101,...
## $ deaths <dbl> 0, 16, 0, 3, 67, 7, 3, 0, 0, 118, 54, 1, 16, 3, 32, 19, 40, ...
## $ hosp   <dbl> 51, 816, 546, 706, 3186, 356, 134, 99, 104, 2143, 1717, 111,...
## $ tests  <dbl> 1899, 11742, 0, 12350, 112874, 18897, 17281, 5409, 2865, 284...
## Observations: 11,269
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

## 
## Recency is defined as 2020-09-10 through current
## 
## Recency is defined as 2020-09-10 through current

The segments appear to follow reasonable patterns:

  • High impact, skewed early
  • Medium impact, skewed early
  • Medium impact, skewed late
  • Small impact, skewed early (two segments are broadly in this category)
  • Small impact, sustained

Comparisons of segment membership can also be made:

# Confirm that the names 
if (!all.equal(names(test_hier5$useClusters), names(test_hier6_201010$useClusters))) {
    stop("\nIssue with cluster names not matching\n")
}

# Create a data frame of segment changes
segChanges <- tibble::tibble(state=names(test_hier5$useClusters), 
                             oldCluster=test_hier5$useClusters, 
                             newCluster=test_hier6_201010$useClusters
                             )

# Counts of states by oldCluster and newCluster
segChanges %>%
    count(oldCluster, newCluster)
## # A tibble: 7 x 3
##   oldCluster newCluster     n
##        <int>      <int> <int>
## 1          1          1    13
## 2          2          2     8
## 3          3          1     5
## 4          3          3     4
## 5          3          6     8
## 6          4          4     4
## 7          5          5     9
# Print states that changed clusters
segChanges %>%
    filter(oldCluster != newCluster)
## # A tibble: 13 x 3
##    state oldCluster newCluster
##    <chr>      <int>      <int>
##  1 IA             3          6
##  2 KS             3          1
##  3 KY             3          1
##  4 MN             3          6
##  5 MO             3          1
##  6 ND             3          1
##  7 NE             3          6
##  8 NH             3          6
##  9 NM             3          6
## 10 OH             3          6
## 11 SD             3          1
## 12 VA             3          6
## 13 WI             3          6

What was previously one of the two low-impact segments has been split in to:

  • CO, ME, VT, WA (new segment from split); and
  • IA, MN, NE, NH, NM, OH, VA, WI (new segment from split); and
  • KS, KY, MO, ND, SD (consolidates to other low-impact segment that has rising cases with stable deaths)

At a glance it seems reasonable, though the “lower impact” segments could theoretically all be consolidated without losing too much differentiation. That said, the dendrogram built with reasonable input parameters considers the split of the lower-impact segments to be more meaningful than the split of the moderate-early and moderate-late segments. And, the moderate-early and moderate-late split is valuable for analysis, so the decision is made to remain with the 6 hierarchical clusters as of the October 9, 2020 data.

Next, the USA Facts process is updated to be run using a single function, including the capability to download new data (function moved to Coronavirus_Statistics_Functions_v002.R):

# Run file for existing data to confirm functionality
tmpCty <- readRunUSAFacts(maxDate="2020-09-30", 
                          popLoc="./RInputFiles/Coronavirus/covid_county_population_usafacts.csv", 
                          caseLoc="./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201003.csv", 
                          deathLoc="./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201003.csv", 
                          oldFile=burden_20200903_new, 
                          existingCountyClusters=clustVec_county_20200903_new
                          )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 811,530
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 811,530
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0214              0          0.179          0.0761
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Recency is defined as 2020-09-01 through current
## 
## Recency is defined as 2020-09-01 through current
## Warning: Removed 1 rows containing missing values (geom_point).

## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"

# Run subsets of the data
runUSAFactsSubsets(tmpCty, keyStates=c("CA", "TX", "FL", "NY", "IL", "PA"))

runUSAFactsSubsets(tmpCty, 
                   lagMetrics=tibble::tibble(metric=c("pop", "dpm"), 
                                             n=c(3, 5), 
                                             minDate="2020-03-01", 
                                             maxDate="2020-09-30"
                                             )
                   )
## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 15 x 5
##    state cluster countyName             dpm      pop
##    <chr> <fct>   <chr>                <dbl>    <dbl>
##  1 06037 5       5 - Los Angeles (CA)  655. 10039107
##  2 17031 4       4 - Cook (IL)        1015.  5150233
##  3 48201 1       1 - Harris (TX)       551.  4713325
##  4 04013 4       4 - Maricopa (AZ)     756.  4485414
##  5 06073 2       2 - San Diego (CA)    235.  3338330
##  6 06059 1       1 - Orange (CA)       399.  3175692
##  7 12086 4       4 - Miami-Dade (FL)  1207.  2716940
##  8 48113 1       1 - Dallas (TX)       430.  2635516
##  9 36047 3       3 - Kings (NY)       2863.  2559903
## 10 36081 3       3 - Queens (NY)      3218.  2253858
## 11 53033 5       5 - King (WA)         337.  2252782
## 12 06085 2       2 - Santa Clara (CA)  167.  1927852
## 13 26163 3       3 - Wayne (MI)       1706.  1749343
## 14 06001 2       2 - Alameda (CA)      247.  1671329
## 15 39049 5       5 - Franklin (OH)     467.  1316756

## 
## The best lags are:
## # A tibble: 15 x 5
##    countyName           state cluster  corr   lag
##    <chr>                <chr> <fct>   <dbl> <int>
##  1 5 - Franklin (OH)    39049 5       0.218     0
##  2 5 - Los Angeles (CA) 06037 5       0.616     0
##  3 2 - San Diego (CA)   06073 2       0.646     2
##  4 5 - King (WA)        53033 5       0.198     2
##  5 4 - Cook (IL)        17031 4       0.836     5
##  6 3 - Kings (NY)       36047 3       0.965     8
##  7 3 - Queens (NY)      36081 3       0.941     8
##  8 1 - Dallas (TX)      48113 1       0.782     9
##  9 3 - Wayne (MI)       26163 3       0.844    10
## 10 1 - Harris (TX)      48201 1       0.740    13
## 11 4 - Maricopa (AZ)    04013 4       0.965    18
## 12 1 - Orange (CA)      06059 1       0.757    24
## 13 2 - Alameda (CA)     06001 2       0.727    24
## 14 2 - Santa Clara (CA) 06085 2       0.487    30
## 15 4 - Miami-Dade (FL)  12086 4       0.785    30
## Warning: Removed 26 rows containing missing values (geom_path).
## Warning: Removed 26 rows containing missing values (geom_path).

## 
## *** KEY COUNTIES INCLUDE: ***
## # A tibble: 25 x 5
##    state cluster countyName            dpm     pop
##    <chr> <fct>   <chr>               <dbl>   <dbl>
##  1 36005 3       3 - Bronx (NY)      3492. 1418207
##  2 36081 3       3 - Queens (NY)     3218. 2253858
##  3 36047 3       3 - Kings (NY)      2863. 2559903
##  4 34013 3       3 - Essex (NJ)      2663.  798975
##  5 34031 3       3 - Passaic (NJ)    2499.  501826
##  6 48215 4       4 - Hidalgo (TX)    1755.  868707
##  7 04027 4       4 - Yuma (AZ)       1614.  213787
##  8 22017 4       4 - Caddo (LA)      1503.  240204
##  9 44007 4       4 - Providence (RI) 1398.  638931
## 10 45041 4       4 - Florence (SC)   1381.  138293
## # ... with 15 more rows

## 
## The best lags are:
## # A tibble: 25 x 5
##    countyName          state cluster    corr   lag
##    <chr>               <chr> <fct>     <dbl> <int>
##  1 2 - Pulaski (AR)    05119 2        0.558      0
##  2 5 - Columbiana (OH) 39029 5       -0.0149     0
##  3 5 - Litchfield (CT) 09005 5        0.922      4
##  4 4 - Caddo (LA)      22017 4        0.210      5
##  5 3 - Bronx (NY)      36005 3        0.975      6
##  6 3 - Essex (NJ)      34013 3        0.876      7
##  7 3 - Kings (NY)      36047 3        0.965      8
##  8 3 - Queens (NY)     36081 3        0.941      8
##  9 4 - Florence (SC)   45041 4        0.856      8
## 10 3 - Passaic (NJ)    34031 3        0.810      9
## # ... with 15 more rows
## Warning: Removed 158 rows containing missing values (geom_path).
## Warning: Removed 158 rows containing missing values (geom_path).

# Confirm that outputs are identical
identical(tmpCty$pop, pop_usafacts)
## [1] TRUE
identical(tmpCty$burdenData, burden_20201003)
## [1] TRUE
identical(tmpCty$clusterData, clust_20201003)
## [1] TRUE
identical(tmpCty$clustVec, clustVec_county_20200903_new)
## [1] TRUE
identical(tmpCty$helperACC_county, helperACC_county_20201003)
## [1] TRUE
identical(tmpCty$clusterStateData, clusterStateData_20201003)
## [1] TRUE

The function as written can re-create previous results using existing data. Next steps are to enable data downloads and to create new segments using more recent data.

Data as of October 12, 2020 can be downloaded and processed against the existing segments:

caseLoc_20201012 <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201012.csv"
deathLoc_20201012 <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201012.csv"

# Run file for existing data to confirm functionality
cty_20201012 <- readRunUSAFacts(maxDate="2020-10-10", 
                                popLoc="./RInputFiles/Coronavirus/covid_county_population_usafacts.csv", 
                                caseLoc=caseLoc_20201012, 
                                deathLoc=deathLoc_20201012, 
                                dlCaseDeath=TRUE,
                                oldFile=burden_20200903_new, 
                                existingCountyClusters=clustVec_county_20200903_new
                                )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 840,285
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 840,285
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0189              0          0.154          0.0616
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Recency is defined as 2020-09-11 through current
## 
## Recency is defined as 2020-09-11 through current

## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"

Finally, the CDC data can be run using a single function:

# Create a mapping of epiweek to month (use 2020 for this)
epiMonth <- tibble::tibble(dt=as.Date("2020-01-01")+0:365, 
                           month=lubridate::month(dt),
                           quarter=lubridate::quarter(dt),
                           ew=lubridate::epiweek(dt)
                           ) %>%
    count(ew, month, quarter) %>%
    arrange(ew, -n) %>%
    group_by(ew) %>%
    summarize(month=factor(month.abb[first(month)], levels=month.abb), quarter=first(quarter))
epiMonth
## # A tibble: 53 x 3
##       ew month quarter
##    <dbl> <fct>   <int>
##  1     1 Jan         1
##  2     2 Jan         1
##  3     3 Jan         1
##  4     4 Jan         1
##  5     5 Jan         1
##  6     6 Feb         1
##  7     7 Feb         1
##  8     8 Feb         1
##  9     9 Feb         1
## 10    10 Mar         1
## # ... with 43 more rows
# Population by age bucket (estimated) for 2020
usPopBucket2020 <- survival::uspop2[, , "2020"] %>%
    apply(1, FUN=sum) %>%
    tibble::tibble(ageActual=as.integer(names(.)), pop_2020=.) %>%
    mutate(age=factor(case_when(ageActual <= 24 ~ "Under 25 years", 
                                ageActual <= 44 ~ "25-44 years", 
                                ageActual <= 64 ~ "45-64 years", 
                                ageActual <= 74 ~ "65-74 years", 
                                ageActual <= 84 ~ "75-84 years", 
                                TRUE ~ "85 years and older"
                                ), levels=levels(cdc20200923$age)
                      )
           ) %>%
    group_by(age) %>%
    summarize(pop=sum(pop_2020))
usPopBucket2020
## # A tibble: 6 x 2
##   age                      pop
##   <fct>                  <int>
## 1 Under 25 years     112501697
## 2 25-44 years         89724301
## 3 45-64 years         84356197
## 4 65-74 years         32312186
## 5 75-84 years         15895265
## 6 85 years and older   6597019
# Load and process the CDC data
cdcLoc <- "Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20200923.csv"
cdcList_20200923 <- readRunCDCAllCause(loc=cdcLoc, 
                                       dir="./RInputFiles/Coronavirus/",
                                       startYear=2015, 
                                       curYear=2020,
                                       weekThru=30, 
                                       startWeek=9, 
                                       lst=test_hier5, 
                                       epiMap=epiMonth, 
                                       cvDeathThru="2020-07-31", 
                                       cdcPlotStartWeek=10, 
                                       agePopData=usPopBucket2020
                                       )
## Observations: 174,311
## Variables: 11
## $ Jurisdiction         <chr> "Alabama", "Alabama", "Alabama", "Alabama", "A...
## $ `Week Ending Date`   <chr> "1/10/2015", "1/17/2015", "1/24/2015", "1/31/2...
## $ `State Abbreviation` <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"...
## $ Year                 <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ Week                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ `Age Group`          <chr> "25-44 years", "25-44 years", "25-44 years", "...
## $ `Number of Deaths`   <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50...
## $ `Time Period`        <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2...
## $ Type                 <chr> "Predicted (weighted)", "Predicted (weighted)"...
## $ Suppress             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 174,311
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <chr> "25-44 years", "25-44 years", "25-44 years", "25-44 ye...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2019", "2...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Check Control Levels and Record Counts for Renamed Data:
## # A tibble: 6 x 4
##   age                    n n_deaths_na   deaths
##   <chr>              <int>       <int>    <dbl>
## 1 25-44 years        26066           3  3170502
## 2 45-64 years        31884           7 12464677
## 3 65-74 years        31880          10 12347549
## 4 75-84 years        31902          11 15363924
## 5 85 years and older 31888           9 20067928
## 6 Under 25 years     20691           0  1372225
## # A tibble: 12 x 6
## # Groups:   period, year [6]
##    period     year type                     n n_deaths_na  deaths
##    <chr>     <int> <chr>                <int>       <int>   <dbl>
##  1 2015-2019  2015 Predicted (weighted) 15285           0 5416393
##  2 2015-2019  2015 Unweighted           15285           0 5416393
##  3 2015-2019  2016 Predicted (weighted) 15365           0 5483764
##  4 2015-2019  2016 Unweighted           15365           0 5483764
##  5 2015-2019  2017 Predicted (weighted) 15318           0 5643350
##  6 2015-2019  2017 Unweighted           15318           0 5643350
##  7 2015-2019  2018 Predicted (weighted) 15305           0 5698002
##  8 2015-2019  2018 Unweighted           15305           0 5698002
##  9 2015-2019  2019 Predicted (weighted) 15319           0 5725516
## 10 2015-2019  2019 Unweighted           15319           0 5725516
## 11 2020       2020 Predicted (weighted) 10586          24 4476340
## 12 2020       2020 Unweighted           10541          16 4376415
## # A tibble: 3 x 5
## # Groups:   period [2]
##   period    Suppress                                       n n_deaths_na  deaths
##   <chr>     <chr>                                      <int>       <int>   <dbl>
## 1 2015-2019 <NA>                                      153184           0  5.59e7
## 2 2020      Suppressed (counts highly incomplete, <5~     40          40  0.    
## 3 2020      <NA>                                       21087           0  8.85e6
## # A tibble: 9 x 5
## # Groups:   period [2]
##   period   Note                                            n n_deaths_na  deaths
##   <chr>    <chr>                                       <int>       <int>   <dbl>
## 1 2015-20~ <NA>                                       153184           0  5.59e7
## 2 2020     Data in recent weeks are incomplete. Only~  16591           0  7.27e6
## 3 2020     Data in recent weeks are incomplete. Only~    324           0  1.52e5
## 4 2020     Data in recent weeks are incomplete. Only~    288          30  3.05e4
## 5 2020     Data in recent weeks are incomplete. Only~   1502          10  4.17e5
## 6 2020     Data in recent weeks are incomplete. Only~     60           0  2.71e4
## 7 2020     Estimates for Pennsylvania are too low fo~     48           0  2.23e4
## 8 2020     Weights may be too low to account for und~    436           0  1.40e5
## 9 2020     <NA>                                         1878           0  7.97e5
##    state         Jurisdiction    n n_deaths_na   deaths
## 1     US        United States 3552           0 32276762
## 2     CA           California 3552           0  3072016
## 3     FL              Florida 3552           0  2357528
## 4     TX                Texas 3552           0  2314502
## 5     PA         Pennsylvania 3552           0  1548716
## 6     OH                 Ohio 3552           0  1395014
## 7     IL             Illinois 3552           0  1219119
## 8     NY             New York 3552           0  1155790
## 9     MI             Michigan 3552           0  1111203
## 10    NC       North Carolina 3521          17  1051632
## 11    GA              Georgia 3551           0   965504
## 12    NJ           New Jersey 3546           0   867210
## 13    TN            Tennessee 3552           0   840787
## 14    VA             Virginia 3552           0   774383
## 15    IN              Indiana 3550           0   749760
## 16    MO             Missouri 3548           0   728220
## 17    MA        Massachusetts 3516           0   685409
## 18    AZ              Arizona 3552           0   684537
## 19    YC        New York City 3548           0   671106
## 20    WA           Washington 3551           0   645406
## 21    AL              Alabama 3550           0   598526
## 22    WI            Wisconsin 3533           0   592047
## 23    MD             Maryland 3546           0   570238
## 24    SC       South Carolina 3549           0   560415
## 25    KY             Kentucky 3519           0   545032
## 26    LA            Louisiana 3545           0   525668
## 27    MN            Minnesota 3509           0   503567
## 28    CO             Colorado 3550           0   446708
## 29    OK             Oklahoma 3541           0   445362
## 30    OR               Oregon 3382           0   413553
## 31    MS          Mississippi 3488           0   363792
## 32    AR             Arkansas 3444           0   361612
## 33    CT          Connecticut 3106          13   356416
## 34    IA                 Iowa 3190           0   339791
## 35    PR          Puerto Rico 3272           0   331654
## 36    KS               Kansas 3246           0   296520
## 37    NV               Nevada 3291           0   289275
## 38    WV        West Virginia 3011          10   251046
## 39    UT                 Utah 3438           0   213931
## 40    NM           New Mexico 3140           0   205026
## 41    NE             Nebraska 2846           0   188492
## 42    ME                Maine 2646           0   160341
## 43    ID                Idaho 2766           0   152936
## 44    NH        New Hampshire 2672           0   135110
## 45    HI               Hawaii 2556           0   124379
## 46    RI         Rhode Island 2474           0   114274
## 47    MT              Montana 2556           0   109719
## 48    DE             Delaware 2558           0    99625
## 49    SD         South Dakota 2448           0    86461
## 50    ND         North Dakota 2433           0    75079
## 51    DC District of Columbia 2545           0    64202
## 52    VT              Vermont 2336           0    61925
## 53    WY              Wyoming 2318           0    47230
## 54    AK               Alaska 2352           0    42249

## Observations: 174,311
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 85,466
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Confirm that data suppressions and NA deaths have been aliminated:
## # A tibble: 0 x 11
## # ... with 11 variables: Jurisdiction <chr>, weekEnding <date>, state <chr>,
## #   year <fct>, week <int>, age <fct>, deaths <dbl>, period <fct>, type <chr>,
## #   Suppress <chr>, Note <chr>
## Observations: 80,379
## Variables: 12
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-10, 2015-01-10, 2015-01-10, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ...
## $ age          <fct> Under 25 years, 25-44 years, 45-64 years, 65-74 years,...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ n            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ deaths       <dbl> 25, 67, 253, 202, 272, 320, 28, 49, 256, 222, 253, 332...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## First duplicate is in row number (0 means no duplicates): 0

## Warning: Column `state` joining factor and character vector, coercing into
## character vector

## Joining, by = "state"

## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).

# Download latest CDC data and process
cdcLoc <- "Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20201014.csv"
cdcList_20201014 <- readRunCDCAllCause(loc=cdcLoc, 
                                       dir="./RInputFiles/Coronavirus/",
                                       startYear=2015, 
                                       curYear=2020,
                                       weekThru=34, 
                                       startWeek=9, 
                                       lst=test_hier5_201001, 
                                       epiMap=epiMonth, 
                                       cvDeathThru="2020-08-22", 
                                       cdcPlotStartWeek=10, 
                                       agePopData=usPopBucket2020, 
                                       dlData=TRUE, 
                                       ovrWriteError=FALSE
                                       )
## 
## File: ./RInputFiles/Coronavirus/Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20201014.csv already exists
## 
## File is NOT downloaded again
## Observations: 176,119
## Variables: 11
## $ Jurisdiction         <chr> "Alabama", "Alabama", "Alabama", "Alabama", "A...
## $ `Week Ending Date`   <chr> "01/10/2015", "01/17/2015", "01/24/2015", "01/...
## $ `State Abbreviation` <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"...
## $ Year                 <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ Week                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ `Age Group`          <chr> "25-44 years", "25-44 years", "25-44 years", "...
## $ `Number of Deaths`   <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50...
## $ `Time Period`        <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2...
## $ Type                 <chr> "Predicted (weighted)", "Predicted (weighted)"...
## $ Suppress             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 176,119
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <chr> "25-44 years", "25-44 years", "25-44 years", "25-44 ye...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2019", "2...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Check Control Levels and Record Counts for Renamed Data:
## # A tibble: 6 x 4
##   age                    n n_deaths_na   deaths
##   <chr>              <int>       <int>    <dbl>
## 1 25-44 years        26364           0  3214460
## 2 45-64 years        32211           4 12612924
## 3 65-74 years        32200           5 12500875
## 4 75-84 years        32221           6 15546198
## 5 85 years and older 32210           7 20287067
## 6 Under 25 years     20913           0  1388384
## # A tibble: 12 x 6
## # Groups:   period, year [6]
##    period     year type                     n n_deaths_na  deaths
##    <chr>     <int> <chr>                <int>       <int>   <dbl>
##  1 2015-2019  2015 Predicted (weighted) 15285           0 5416393
##  2 2015-2019  2015 Unweighted           15285           0 5416393
##  3 2015-2019  2016 Predicted (weighted) 15365           0 5483764
##  4 2015-2019  2016 Unweighted           15365           0 5483764
##  5 2015-2019  2017 Predicted (weighted) 15319           0 5643363
##  6 2015-2019  2017 Unweighted           15319           0 5643363
##  7 2015-2019  2018 Predicted (weighted) 15305           0 5698004
##  8 2015-2019  2018 Unweighted           15305           0 5698004
##  9 2015-2019  2019 Predicted (weighted) 15317           0 5725524
## 10 2015-2019  2019 Unweighted           15317           0 5725524
## 11 2020       2020 Predicted (weighted) 11490          13 4858272
## 12 2020       2020 Unweighted           11447           9 4757540
## # A tibble: 3 x 5
## # Groups:   period [2]
##   period    Suppress                                       n n_deaths_na  deaths
##   <chr>     <chr>                                      <int>       <int>   <dbl>
## 1 2015-2019 <NA>                                      153182           0  5.59e7
## 2 2020      Suppressed (counts highly incomplete, <5~     22          22  0.    
## 3 2020      <NA>                                       22915           0  9.62e6
## # A tibble: 10 x 5
## # Groups:   period [2]
##    period   Note                                           n n_deaths_na  deaths
##    <chr>    <chr>                                      <int>       <int>   <dbl>
##  1 2015-20~ <NA>                                      153182           0  5.59e7
##  2 2020     Data in recent weeks are incomplete. Onl~  18444           0  8.13e6
##  3 2020     Data in recent weeks are incomplete. Onl~    360           0  1.68e5
##  4 2020     Data in recent weeks are incomplete. Onl~    382          19  4.45e4
##  5 2020     Data in recent weeks are incomplete. Onl~   1329           3  2.88e5
##  6 2020     Data in recent weeks are incomplete. Onl~     60           0  2.79e4
##  7 2020     Estimates for Pennsylvania are too low f~     36           0  1.68e4
##  8 2020     Weights may be too low to account for un~    284           0  7.19e4
##  9 2020     Weights may be too low to account for un~     12           0  5.72e3
## 10 2020     <NA>                                        2030           0  8.65e5
##    state         Jurisdiction    n n_deaths_na   deaths
## 1     US        United States 3588           0 32655734
## 2     CA           California 3588           0  3108728
## 3     FL              Florida 3588           0  2387134
## 4     TX                Texas 3588           0  2348924
## 5     PA         Pennsylvania 3588           0  1566142
## 6     OH                 Ohio 3588           0  1411315
## 7     IL             Illinois 3588           0  1232847
## 8     NY             New York 3588           0  1167718
## 9     MI             Michigan 3588           0  1123550
## 10    NC       North Carolina 3536           8  1059363
## 11    GA              Georgia 3588           0   978193
## 12    NJ           New Jersey 3580           0   875776
## 13    TN            Tennessee 3588           0   851531
## 14    VA             Virginia 3588           0   782913
## 15    IN              Indiana 3587           0   758488
## 16    MO             Missouri 3586           0   737546
## 17    AZ              Arizona 3588           0   692460
## 18    MA        Massachusetts 3554           0   692109
## 19    YC        New York City 3584           0   676973
## 20    WA           Washington 3588           0   652928
## 21    AL              Alabama 3585           0   606105
## 22    WI            Wisconsin 3570           0   598878
## 23    MD             Maryland 3582           0   576822
## 24    SC       South Carolina 3586           0   568254
## 25    KY             Kentucky 3553           0   552088
## 26    LA            Louisiana 3577           0   532758
## 27    MN            Minnesota 3547           0   509530
## 28    CO             Colorado 3586           0   451948
## 29    OK             Oklahoma 3576           0   450316
## 30    OR               Oregon 3418           0   418422
## 31    MS          Mississippi 3522           0   368372
## 32    AR             Arkansas 3484           0   366066
## 33    CT          Connecticut 3146           9   360662
## 34    IA                 Iowa 3224           0   343678
## 35    PR          Puerto Rico 3305           0   336026
## 36    KS               Kansas 3282           0   300158
## 37    NV               Nevada 3327           0   292844
## 38    WV        West Virginia 3033           3   253132
## 39    UT                 Utah 3474           0   216546
## 40    NM           New Mexico 3170           0   207657
## 41    NE             Nebraska 2878           0   190649
## 42    ME                Maine 2676           0   162212
## 43    ID                Idaho 2796           0   154882
## 44    NH        New Hampshire 2700           0   136571
## 45    HI               Hawaii 2588           0   125932
## 46    RI         Rhode Island 2504           0   115701
## 47    MT              Montana 2584           0   111052
## 48    DE             Delaware 2590           0   100672
## 49    SD         South Dakota 2478           0    87630
## 50    ND         North Dakota 2459           0    75823
## 51    DC District of Columbia 2571           0    64792
## 52    VT              Vermont 2362           0    62663
## 53    WY              Wyoming 2348           0    47960
## 54    AK               Alaska 2379           2    42735

## Observations: 176,119
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 86,663
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Confirm that data suppressions and NA deaths have been aliminated:
## # A tibble: 0 x 11
## # ... with 11 variables: Jurisdiction <chr>, weekEnding <date>, state <chr>,
## #   year <fct>, week <int>, age <fct>, deaths <dbl>, period <fct>, type <chr>,
## #   Suppress <chr>, Note <chr>
## Observations: 81,507
## Variables: 12
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-10, 2015-01-10, 2015-01-10, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ...
## $ age          <fct> Under 25 years, 25-44 years, 45-64 years, 65-74 years,...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ n            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ deaths       <dbl> 25, 67, 253, 202, 272, 320, 28, 49, 256, 222, 253, 332...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## First duplicate is in row number (0 means no duplicates): 0

## Warning: Column `state` joining factor and character vector, coercing into
## character vector

## Joining, by = "state"

## Warning: Removed 1 rows containing missing values (position_stack).

## Warning: Removed 1 rows containing missing values (geom_text).

Next, a capability for saving and reading .RDS files is included, and key files from previous analyses are saved:

# File used for comparison in readRunCOVIDTrackingProject
saveToRDS(test_hier5_201001, ovrWriteError=FALSE)

# Files used for comparison in readRunUSAFacts
saveToRDS(burden_20200903_new, ovrWriteError=FALSE)
saveToRDS(clustVec_county_20200903_new, ovrWriteError=FALSE)

# Files used for CDC all-cause deaths
saveToRDS(epiMonth, ovrWriteError=FALSE)
saveToRDS(usPopBucket2020, ovrWriteError=FALSE)

The full process of downloading new data and creating new segments is then:

  1. Download new data from COVID Tracking Project and create 6 clusters (can update parameters if the dendrogram and plots do not look acceptable)
  2. Download new data from USA Facts and create 5 clusters (can updated parameters if the charts do not look acceptable)
  3. Download new data from CDC and run analyses against it

Example code includes:

# Create segments and download data from COVID Tracking Project
locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201015.csv"
test_hier6_201014 <- readRunCOVIDTrackingProject(thruLabel="Oct 14, 2020", 
                                                 downloadTo=if(file.exists(locDownload)) NULL else locDownload,
                                                 readFrom=locDownload, 
                                                 compareFile=readFromRDS("test_hier5_201001")$dfRaw,
                                                 hierarchical=TRUE, 
                                                 kCut=6, 
                                                 minShape=3, 
                                                 ratioDeathvsCase = 5, 
                                                 ratioTotalvsShape = 0.5, 
                                                 minDeath=100, 
                                                 minCase=10000
                                                 )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7876282        208389               8292248
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: probableCases
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-10-14 2020-10-13 2020-10-12 2020-10-11 2020-10-10 2020-10-09 2020-10-08 2020-10-07 2020-10-06 2020-10-05 2020-10-04 2020-10-03 2020-10-02 2020-10-01
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
## [1] date     name     newValue oldValue
## <0 rows> (or 0-length row.names)
## Joining, by = c("date", "name")
## Warning: Removed 14 rows containing missing values (geom_path).
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state             name newValue oldValue
## 1    HI positiveIncrease    12469    12289
## Observations: 12,589
## Variables: 55
## $ date                        <date> 2020-10-14, 2020-10-14, 2020-10-14, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 11183, 167977, 95246, 0, 227635, 855072...
## $ probableCases               <dbl> NA, 20232, 5011, NA, 5043, NA, 5430, 24...
## $ negative                    <dbl> 505994, 1078476, 1083107, 1616, 1366610...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 517177, 1226221, 1173342, 1616, 1589202...
## $ hospitalizedCurrently       <dbl> 56, 834, 582, NA, 711, 3126, 385, 188, ...
## $ hospitalizedCumulative      <dbl> NA, 18635, 6148, NA, 20366, NA, 8003, 1...
## $ inIcuCurrently              <dbl> NA, NA, 241, NA, 163, 753, NA, NA, 24, ...
## $ inIcuCumulative             <dbl> NA, 1907, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 7, NA, 112, NA, 85, NA, NA, NA, 7, NA, ...
## $ onVentilatorCumulative      <dbl> NA, 1085, 755, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 5950, 74238, 85597, NA, 37586, NA, 6922...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "10/14/2020 03:59", "10/14/2020 11:00",...
## $ dateModified                <dttm> 2020-10-14 03:59:00, 2020-10-14 11:00:...
## $ checkTimeEt                 <chr> "10/13 23:59", "10/14 07:00", "10/13 20...
## $ death                       <dbl> 64, 2706, 1634, 0, 5772, 16639, 2009, 4...
## $ hospitalized                <dbl> NA, 18635, 6148, NA, 20366, NA, 8003, 1...
## $ dateChecked                 <dttm> 2020-10-14 03:59:00, 2020-10-14 11:00:...
## $ totalTestsViral             <dbl> 517177, 1226221, 1173342, 1616, NA, 164...
## $ positiveTestsViral          <dbl> 10109, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ negativeTestsViral          <dbl> 506755, NA, 1083107, NA, NA, NA, NA, NA...
## $ positiveCasesViral          <dbl> 11183, 147745, 90235, 0, 222592, 855072...
## $ deathConfirmed              <dbl> 64, 2549, 1484, NA, 5485, NA, NA, 3637,...
## $ deathProbable               <dbl> NA, 157, 150, NA, 287, NA, NA, 900, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1588632, NA, 44...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1589202, NA, 1032545, N...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 303875, NA, 174566, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 12313, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 162253, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 60687, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 31294, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 5537, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 144, 784, 1079, 0, 901, 2666, 1048, 164...
## $ negativeIncrease            <dbl> 2244, 4352, 9793, 0, 7263, 89104, 8218,...
## $ total                       <dbl> 517177, 1246453, 1178353, 1616, 1594245...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 2388, 5014, 10677, 0, 8051, 91770, 1730...
## $ posNeg                      <dbl> 517177, 1246453, 1178353, 1616, 1594245...
## $ deathIncrease               <dbl> 4, 41, 23, 0, 5, 58, 11, 4, 1, 1, 66, 1...
## $ hospitalizedIncrease        <dbl> 0, 195, 79, 0, 54, 0, 88, 0, 0, 0, 258,...
## $ hash                        <chr> "3e715172385e995a3ce6d9b2672d1b675141de...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        60253    825    NA    457677  1065
## 2 TRUE       7816029 207564    NA 117909065 11524
## Observations: 11,524
## Variables: 6
## $ date   <date> 2020-10-14, 2020-10-14, 2020-10-14, 2020-10-14, 2020-10-14,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 144, 784, 1079, 901, 2666, 1048, 164, 64, 71, 2883, 1297, 61...
## $ deaths <dbl> 4, 41, 23, 5, 58, 11, 4, 1, 1, 66, 16, 4, 12, 2, 48, 14, 67,...
## $ hosp   <dbl> 56, 834, 582, 711, 3126, 385, 188, 86, 116, 2155, 1705, 109,...
## $ tests  <dbl> 2388, 5014, 10677, 8051, 91770, 17303, 8376, 5181, 1071, 218...
## Observations: 11,524
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

## 
## Recency is defined as 2020-09-15 through current
## 
## Recency is defined as 2020-09-15 through current

# Create segments and download data from USA Facts
caseLoc <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201015.csv"
deathLoc <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201015.csv"

# Run file for existing data to confirm functionality
cty_20201015 <- readRunUSAFacts(maxDate="2020-10-13", 
                                popLoc="./RInputFiles/Coronavirus/covid_county_population_usafacts.csv", 
                                caseLoc=caseLoc, 
                                deathLoc=deathLoc, 
                                dlCaseDeath=!(file.exists(caseLoc) & file.exists(deathLoc)),
                                oldFile=readFromRDS("burden_20200903_new"), 
                                existingStateClusters=test_hier6_201014$useClusters,
                                createClusters=TRUE, 
                                minShape=3,
                                ratioDeathvsCase = 5,
                                ratioTotalvsShape = 0.5,
                                minDeath=100,
                                minCase=5000,
                                nCenters=5,
                                testCenters=1:25,
                                iter.max=20,
                                nstart=10,
                                seed=2010151358
                                )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0157              0          0.147          0.0559
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Cluster means and counts
##                  1     2      3      4      5
## .           277.00 73.00 240.00 529.00 472.00
## totalCases    0.68  1.47   1.22   0.90   0.50
## totalDeaths   2.32 10.02   5.13   1.91   0.59
## cases_3       0.02  0.05   0.01   0.01   0.01
## deaths_3      0.08  0.09   0.04   0.03   0.07
## cases_4       0.15  0.25   0.10   0.03   0.05
## deaths_4      1.34  1.48   0.67   0.23   0.56
## cases_5       0.14  0.13   0.10   0.04   0.06
## deaths_5      1.63  0.95   0.78   0.21   0.38
## cases_6       0.08  0.09   0.11   0.09   0.07
## deaths_6      0.65  0.51   0.53   0.29   0.38
## cases_7       0.15  0.20   0.26   0.25   0.17
## deaths_7      0.37  0.59   0.80   0.76   0.59
## cases_8       0.16  0.15   0.20   0.24   0.18
## deaths_8      0.37  0.79   1.07   1.41   0.55
## cases_9       0.17  0.09   0.15   0.23   0.23
## deaths_9      0.36  0.47   0.83   1.51   0.67
## cases_10      0.11  0.05   0.07   0.12   0.16
## deaths_10     0.20  0.14   0.29   0.54   0.60

## 
## Recency is defined as 2020-09-14 through current
## 
## Recency is defined as 2020-09-14 through current
## Warning: Removed 1 rows containing missing values (geom_point).

## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"

# Download latest CDC data and process
cdcLoc <- "Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20201015.csv"
cdcList_20201015 <- readRunCDCAllCause(loc=cdcLoc, 
                                       startYear=2015, 
                                       curYear=2020,
                                       weekThru=34, 
                                       startWeek=9, 
                                       lst=test_hier6_201014, 
                                       epiMap=readFromRDS("epiMonth"), 
                                       cvDeathThru="2020-08-22", 
                                       cdcPlotStartWeek=10, 
                                       agePopData=readFromRDS("usPopBucket2020"), 
                                       dlData=TRUE, 
                                       ovrWriteError=FALSE
                                       )
## 
## Downloading CDC data from https://data.cdc.gov/api/views/y5bj-9g5w/rows.csv?accessType=DOWNLOAD to ./RInputFiles/Coronavirus/Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20201015.csv 
## Observations: 176,686
## Variables: 11
## $ Jurisdiction         <chr> "Alabama", "Alabama", "Alabama", "Alabama", "A...
## $ `Week Ending Date`   <chr> "01/10/2015", "01/17/2015", "01/24/2015", "01/...
## $ `State Abbreviation` <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"...
## $ Year                 <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ Week                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ `Age Group`          <chr> "25-44 years", "25-44 years", "25-44 years", "...
## $ `Number of Deaths`   <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50...
## $ `Time Period`        <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2...
## $ Type                 <chr> "Predicted (weighted)", "Predicted (weighted)"...
## $ Suppress             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 176,686
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <chr> "25-44 years", "25-44 years", "25-44 years", "25-44 ye...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2019", "2...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Check Control Levels and Record Counts for Renamed Data:
## # A tibble: 6 x 4
##   age                    n n_deaths_na   deaths
##   <chr>              <int>       <int>    <dbl>
## 1 25-44 years        26446           1  3226901
## 2 45-64 years        32314           4 12655629
## 3 65-74 years        32308           4 12546354
## 4 75-84 years        32327           5 15599787
## 5 85 years and older 32314           5 20352070
## 6 Under 25 years     20977           0  1392852
## # A tibble: 12 x 6
## # Groups:   period, year [6]
##    period     year type                     n n_deaths_na  deaths
##    <chr>     <int> <chr>                <int>       <int>   <dbl>
##  1 2015-2019  2015 Predicted (weighted) 15287           0 5416412
##  2 2015-2019  2015 Unweighted           15287           0 5416412
##  3 2015-2019  2016 Predicted (weighted) 15365           0 5483764
##  4 2015-2019  2016 Unweighted           15365           0 5483764
##  5 2015-2019  2017 Predicted (weighted) 15317           0 5643337
##  6 2015-2019  2017 Unweighted           15317           0 5643337
##  7 2015-2019  2018 Predicted (weighted) 15305           0 5698007
##  8 2015-2019  2018 Unweighted           15305           0 5698007
##  9 2015-2019  2019 Predicted (weighted) 15317           0 5725524
## 10 2015-2019  2019 Unweighted           15317           0 5725524
## 11 2020       2020 Predicted (weighted) 11773          13 4969185
## 12 2020       2020 Unweighted           11731           6 4870320
## # A tibble: 3 x 5
## # Groups:   period [2]
##   period    Suppress                                       n n_deaths_na  deaths
##   <chr>     <chr>                                      <int>       <int>   <dbl>
## 1 2015-2019 <NA>                                      153182           0  5.59e7
## 2 2020      Suppressed (counts highly incomplete, <5~     19          19  0.    
## 3 2020      <NA>                                       23485           0  9.84e6
## # A tibble: 10 x 5
## # Groups:   period [2]
##    period   Note                                           n n_deaths_na  deaths
##    <chr>    <chr>                                      <int>       <int>   <dbl>
##  1 2015-20~ <NA>                                      153182           0  5.59e7
##  2 2020     Data in recent weeks are incomplete. Onl~  18954           0  8.34e6
##  3 2020     Data in recent weeks are incomplete. Onl~    420           0  1.96e5
##  4 2020     Data in recent weeks are incomplete. Onl~    394          16  4.47e4
##  5 2020     Data in recent weeks are incomplete. Onl~   1362           3  2.99e5
##  6 2020     Data in recent weeks are incomplete. Onl~     12           0  5.54e3
##  7 2020     Estimates for Pennsylvania are too low f~     36           0  1.69e4
##  8 2020     Weights may be too low to account for un~    248           0  6.06e4
##  9 2020     Weights may be too low to account for un~     12           0  5.69e3
## 10 2020     <NA>                                        2066           0  8.77e5
##    state         Jurisdiction    n n_deaths_na   deaths
## 1     US        United States 3600           0 32767366
## 2     CA           California 3600           0  3119418
## 3     FL              Florida 3600           0  2395987
## 4     TX                Texas 3600           0  2358271
## 5     PA         Pennsylvania 3600           0  1571585
## 6     OH                 Ohio 3600           0  1416195
## 7     IL             Illinois 3600           0  1237033
## 8     NY             New York 3600           0  1171620
## 9     MI             Michigan 3599           0  1125676
## 10    NC       North Carolina 3536           8  1060294
## 11    GA              Georgia 3600           0   982315
## 12    NJ           New Jersey 3594           0   878338
## 13    TN            Tennessee 3600           0   855255
## 14    VA             Virginia 3600           0   785789
## 15    IN              Indiana 3598           0   760849
## 16    MO             Missouri 3597           0   740140
## 17    AZ              Arizona 3600           0   694914
## 18    MA        Massachusetts 3568           0   694218
## 19    YC        New York City 3596           0   678875
## 20    WA           Washington 3600           0   655215
## 21    AL              Alabama 3596           0   608067
## 22    WI            Wisconsin 3581           0   601074
## 23    MD             Maryland 3594           0   578852
## 24    SC       South Carolina 3598           0   570629
## 25    KY             Kentucky 3564           0   554152
## 26    LA            Louisiana 3590           0   534961
## 27    MN            Minnesota 3557           0   511091
## 28    CO             Colorado 3598           0   453533
## 29    OK             Oklahoma 3590           0   452162
## 30    OR               Oregon 3429           0   419882
## 31    MS          Mississippi 3533           0   369852
## 32    AR             Arkansas 3497           0   367683
## 33    CT          Connecticut 3155           8   361512
## 34    IA                 Iowa 3232           0   344962
## 35    PR          Puerto Rico 3317           0   336978
## 36    KS               Kansas 3292           0   301264
## 37    NV               Nevada 3339           0   293938
## 38    WV        West Virginia 3042           3   253845
## 39    UT                 Utah 3486           0   217243
## 40    NM           New Mexico 3180           0   208428
## 41    NE             Nebraska 2886           0   190958
## 42    ME                Maine 2686           0   162773
## 43    ID                Idaho 2804           0   155435
## 44    NH        New Hampshire 2708           0   137002
## 45    HI               Hawaii 2596           0   126342
## 46    RI         Rhode Island 2513           0   116090
## 47    MT              Montana 2592           0   111506
## 48    DE             Delaware 2599           0   101066
## 49    SD         South Dakota 2485           0    87752
## 50    ND         North Dakota 2467           0    76289
## 51    DC District of Columbia 2581           0    65035
## 52    VT              Vermont 2370           0    62844
## 53    WY              Wyoming 2356           0    48148
## 54    AK               Alaska 2385           0    42892

## Observations: 176,686
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 86,667
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Confirm that data suppressions and NA deaths have been aliminated:
## # A tibble: 0 x 11
## # ... with 11 variables: Jurisdiction <chr>, weekEnding <date>, state <chr>,
## #   year <fct>, week <int>, age <fct>, deaths <dbl>, period <fct>, type <chr>,
## #   Suppress <chr>, Note <chr>
## Observations: 81,511
## Variables: 12
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-10, 2015-01-10, 2015-01-10, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ...
## $ age          <fct> Under 25 years, 25-44 years, 45-64 years, 65-74 years,...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ n            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ deaths       <dbl> 25, 67, 253, 202, 272, 320, 28, 49, 256, 222, 253, 332...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## First duplicate is in row number (0 means no duplicates): 0

## Warning: Column `state` joining factor and character vector, coercing into
## character vector

## Joining, by = "state"

## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).

The process should be able to run in a stand-alone fashion, with any data needed available to be loaded from the saved RDS files.

The process can also be run using existing data and segments:

# Use existing segments with different data (already downloaded)
locDownload <- "./RInputFiles/Coronavirus/CV_downloaded_201015.csv"
test_old_201014 <- readRunCOVIDTrackingProject(thruLabel="Oct 14, 2020", 
                                               readFrom=locDownload, 
                                               compareFile=readFromRDS("test_hier5_201001")$dfRaw,
                                               useClusters=readFromRDS("test_hier5_201001")$useClusters
                                               )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   state = col_character(),
##   dataQualityGrade = col_character(),
##   lastUpdateEt = col_character(),
##   dateModified = col_datetime(format = ""),
##   checkTimeEt = col_character(),
##   dateChecked = col_datetime(format = ""),
##   fips = col_character(),
##   totalTestResultsSource = col_character(),
##   hash = col_character(),
##   grade = col_logical()
## )
## See spec(...) for full column specifications.
## 
## File is unique by state and date
## 
## 
## Overall control totals in file:
## # A tibble: 1 x 3
##   positiveIncrease deathIncrease hospitalizedCurrently
##              <dbl>         <dbl>                 <dbl>
## 1          7876282        208389               8292248
## 
## *** COMPARISONS TO REFERENCE FILE: compareFile
## 
## Checkin for similarity of: column names
## In reference but not in current: 
## In current but not in reference: probableCases
## 
## Checkin for similarity of: states
## In reference but not in current: 
## In current but not in reference: 
## 
## Checkin for similarity of: dates
## In reference but not in current: 
## In current but not in reference: 2020-10-14 2020-10-13 2020-10-12 2020-10-11 2020-10-10 2020-10-09 2020-10-08 2020-10-07 2020-10-06 2020-10-05 2020-10-04 2020-10-03 2020-10-02 2020-10-01
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("date", "name")
## [1] date     name     newValue oldValue
## <0 rows> (or 0-length row.names)
## Joining, by = c("date", "name")
## Warning: Removed 14 rows containing missing values (geom_path).
## 
## 
## *** Difference of at least 5 and difference is at least 1%:
## Joining, by = c("state", "name")

##   state             name newValue oldValue
## 1    HI positiveIncrease    12469    12289
## Observations: 12,589
## Variables: 55
## $ date                        <date> 2020-10-14, 2020-10-14, 2020-10-14, 20...
## $ state                       <chr> "AK", "AL", "AR", "AS", "AZ", "CA", "CO...
## $ positive                    <dbl> 11183, 167977, 95246, 0, 227635, 855072...
## $ probableCases               <dbl> NA, 20232, 5011, NA, 5043, NA, 5430, 24...
## $ negative                    <dbl> 505994, 1078476, 1083107, 1616, 1366610...
## $ pending                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestResults            <dbl> 517177, 1226221, 1173342, 1616, 1589202...
## $ hospitalizedCurrently       <dbl> 56, 834, 582, NA, 711, 3126, 385, 188, ...
## $ hospitalizedCumulative      <dbl> NA, 18635, 6148, NA, 20366, NA, 8003, 1...
## $ inIcuCurrently              <dbl> NA, NA, 241, NA, 163, 753, NA, NA, 24, ...
## $ inIcuCumulative             <dbl> NA, 1907, NA, NA, NA, NA, NA, NA, NA, N...
## $ onVentilatorCurrently       <dbl> 7, NA, 112, NA, 85, NA, NA, NA, 7, NA, ...
## $ onVentilatorCumulative      <dbl> NA, 1085, 755, NA, NA, NA, NA, NA, NA, ...
## $ recovered                   <dbl> 5950, 74238, 85597, NA, 37586, NA, 6922...
## $ dataQualityGrade            <chr> "A", "A", "A+", "D", "A+", "B", "A", "B...
## $ lastUpdateEt                <chr> "10/14/2020 03:59", "10/14/2020 11:00",...
## $ dateModified                <dttm> 2020-10-14 03:59:00, 2020-10-14 11:00:...
## $ checkTimeEt                 <chr> "10/13 23:59", "10/14 07:00", "10/13 20...
## $ death                       <dbl> 64, 2706, 1634, 0, 5772, 16639, 2009, 4...
## $ hospitalized                <dbl> NA, 18635, 6148, NA, 20366, NA, 8003, 1...
## $ dateChecked                 <dttm> 2020-10-14 03:59:00, 2020-10-14 11:00:...
## $ totalTestsViral             <dbl> 517177, 1226221, 1173342, 1616, NA, 164...
## $ positiveTestsViral          <dbl> 10109, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ negativeTestsViral          <dbl> 506755, NA, 1083107, NA, NA, NA, NA, NA...
## $ positiveCasesViral          <dbl> 11183, 147745, 90235, 0, 222592, 855072...
## $ deathConfirmed              <dbl> 64, 2549, 1484, NA, 5485, NA, NA, 3637,...
## $ deathProbable               <dbl> NA, 157, 150, NA, 287, NA, NA, 900, NA,...
## $ totalTestEncountersViral    <dbl> NA, NA, NA, NA, NA, NA, 1588632, NA, 44...
## $ totalTestsPeopleViral       <dbl> NA, NA, NA, NA, 1589202, NA, 1032545, N...
## $ totalTestsAntibody          <dbl> NA, NA, NA, NA, 303875, NA, 174566, NA,...
## $ positiveTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 12313, NA, NA, ...
## $ negativeTestsAntibody       <dbl> NA, NA, NA, NA, NA, NA, 162253, NA, NA,...
## $ totalTestsPeopleAntibody    <dbl> NA, 60687, NA, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ negativeTestsPeopleAntibody <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ totalTestsPeopleAntigen     <dbl> NA, NA, 31294, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsPeopleAntigen  <dbl> NA, NA, 5537, NA, NA, NA, NA, NA, NA, N...
## $ totalTestsAntigen           <dbl> NA, NA, 21856, NA, NA, NA, NA, NA, NA, ...
## $ positiveTestsAntigen        <dbl> NA, NA, 3300, NA, NA, NA, NA, NA, NA, N...
## $ fips                        <chr> "02", "01", "05", "60", "04", "06", "08...
## $ positiveIncrease            <dbl> 144, 784, 1079, 0, 901, 2666, 1048, 164...
## $ negativeIncrease            <dbl> 2244, 4352, 9793, 0, 7263, 89104, 8218,...
## $ total                       <dbl> 517177, 1246453, 1178353, 1616, 1594245...
## $ totalTestResultsSource      <chr> "posNeg", "posNeg", "posNeg", "posNeg",...
## $ totalTestResultsIncrease    <dbl> 2388, 5014, 10677, 0, 8051, 91770, 1730...
## $ posNeg                      <dbl> 517177, 1246453, 1178353, 1616, 1594245...
## $ deathIncrease               <dbl> 4, 41, 23, 0, 5, 58, 11, 4, 1, 1, 66, 1...
## $ hospitalizedIncrease        <dbl> 0, 195, 79, 0, 54, 0, 88, 0, 0, 0, 258,...
## $ hash                        <chr> "3e715172385e995a3ce6d9b2672d1b675141de...
## $ commercialScore             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeRegularScore        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ negativeScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ positiveScore               <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ score                       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ grade                       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## 
## 
## Control totals - note that validState other than TRUE will be discarded
## 
## # A tibble: 2 x 6
##   validState   cases deaths  hosp     tests     n
##   <lgl>        <dbl>  <dbl> <dbl>     <dbl> <dbl>
## 1 FALSE        60253    825    NA    457677  1065
## 2 TRUE       7816029 207564    NA 117909065 11524
## Observations: 11,524
## Variables: 6
## $ date   <date> 2020-10-14, 2020-10-14, 2020-10-14, 2020-10-14, 2020-10-14,...
## $ state  <chr> "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", ...
## $ cases  <dbl> 144, 784, 1079, 901, 2666, 1048, 164, 64, 71, 2883, 1297, 61...
## $ deaths <dbl> 4, 41, 23, 5, 58, 11, 4, 1, 1, 66, 16, 4, 12, 2, 48, 14, 67,...
## $ hosp   <dbl> 56, 834, 582, 711, 3126, 385, 188, 86, 116, 2155, 1705, 109,...
## $ tests  <dbl> 2388, 5014, 10677, 8051, 91770, 17303, 8376, 5181, 1071, 218...
## Observations: 11,524
## Variables: 14
## $ date   <date> 2020-01-22, 2020-01-22, 2020-01-23, 2020-01-23, 2020-01-24,...
## $ state  <chr> "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", "MA", "WA", ...
## $ cases  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hosp   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tests  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
## $ cpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm    <dbl> 0.0000000, 0.0000000, 0.1471796, 0.0000000, 0.0000000, 0.000...
## $ cpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ dpm7   <dbl> NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ hpm7   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ tpm7   <dbl> NA, NA, NA, NA, NA, NA, 0.04205130, 0.00000000, 0.06307695, ...

## 
## Recency is defined as 2020-09-15 through current
## 
## Recency is defined as 2020-09-15 through current

A similar approach can be taken with the county data:

# Previously downloaded data from USA Facts
caseLoc <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201015.csv"
deathLoc <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201015.csv"

# Use existing segments with different data (already downloaded)
old_20201015 <- readRunUSAFacts(maxDate="2020-10-13", 
                                popLoc="./RInputFiles/Coronavirus/covid_county_population_usafacts.csv", 
                                caseLoc=caseLoc, 
                                deathLoc=deathLoc, 
                                oldFile=readFromRDS("burden_20200903_new"), 
                                existingStateClusters=readFromRDS("test_hier5_201001")$useClusters,
                                existingCountyClusters=readFromRDS("clustVec_county_20200903_new"),
                                createClusters=FALSE
                                )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0157              0          0.147          0.0559
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Recency is defined as 2020-09-14 through current
## 
## Recency is defined as 2020-09-14 through current
## Warning: Removed 1 rows containing missing values (geom_point).

## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"

A similar approach can be taken with the CDC all-cause deaths data:

# Use data that have previously been downloaded
cdcLoc <- "Weekly_counts_of_deaths_by_jurisdiction_and_age_group_downloaded_20201015.csv"
cdcList_old_20201015 <- readRunCDCAllCause(loc=cdcLoc, 
                                           startYear=2015, 
                                           curYear=2020,
                                           weekThru=34, 
                                           startWeek=9, 
                                           lst=readFromRDS("test_hier5_201001"), 
                                           epiMap=readFromRDS("epiMonth"), 
                                           agePopData=readFromRDS("usPopBucket2020"), 
                                           cvDeathThru="2020-08-22", 
                                           cdcPlotStartWeek=10
                                           )
## Observations: 176,686
## Variables: 11
## $ Jurisdiction         <chr> "Alabama", "Alabama", "Alabama", "Alabama", "A...
## $ `Week Ending Date`   <chr> "01/10/2015", "01/17/2015", "01/24/2015", "01/...
## $ `State Abbreviation` <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL"...
## $ Year                 <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ Week                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ `Age Group`          <chr> "25-44 years", "25-44 years", "25-44 years", "...
## $ `Number of Deaths`   <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50...
## $ `Time Period`        <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2...
## $ Type                 <chr> "Predicted (weighted)", "Predicted (weighted)"...
## $ Suppress             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 176,686
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <int> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <chr> "25-44 years", "25-44 years", "25-44 years", "25-44 ye...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <chr> "2015-2019", "2015-2019", "2015-2019", "2015-2019", "2...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Check Control Levels and Record Counts for Renamed Data:
## # A tibble: 6 x 4
##   age                    n n_deaths_na   deaths
##   <chr>              <int>       <int>    <dbl>
## 1 25-44 years        26446           1  3226901
## 2 45-64 years        32314           4 12655629
## 3 65-74 years        32308           4 12546354
## 4 75-84 years        32327           5 15599787
## 5 85 years and older 32314           5 20352070
## 6 Under 25 years     20977           0  1392852
## # A tibble: 12 x 6
## # Groups:   period, year [6]
##    period     year type                     n n_deaths_na  deaths
##    <chr>     <int> <chr>                <int>       <int>   <dbl>
##  1 2015-2019  2015 Predicted (weighted) 15287           0 5416412
##  2 2015-2019  2015 Unweighted           15287           0 5416412
##  3 2015-2019  2016 Predicted (weighted) 15365           0 5483764
##  4 2015-2019  2016 Unweighted           15365           0 5483764
##  5 2015-2019  2017 Predicted (weighted) 15317           0 5643337
##  6 2015-2019  2017 Unweighted           15317           0 5643337
##  7 2015-2019  2018 Predicted (weighted) 15305           0 5698007
##  8 2015-2019  2018 Unweighted           15305           0 5698007
##  9 2015-2019  2019 Predicted (weighted) 15317           0 5725524
## 10 2015-2019  2019 Unweighted           15317           0 5725524
## 11 2020       2020 Predicted (weighted) 11773          13 4969185
## 12 2020       2020 Unweighted           11731           6 4870320
## # A tibble: 3 x 5
## # Groups:   period [2]
##   period    Suppress                                       n n_deaths_na  deaths
##   <chr>     <chr>                                      <int>       <int>   <dbl>
## 1 2015-2019 <NA>                                      153182           0  5.59e7
## 2 2020      Suppressed (counts highly incomplete, <5~     19          19  0.    
## 3 2020      <NA>                                       23485           0  9.84e6
## # A tibble: 10 x 5
## # Groups:   period [2]
##    period   Note                                           n n_deaths_na  deaths
##    <chr>    <chr>                                      <int>       <int>   <dbl>
##  1 2015-20~ <NA>                                      153182           0  5.59e7
##  2 2020     Data in recent weeks are incomplete. Onl~  18954           0  8.34e6
##  3 2020     Data in recent weeks are incomplete. Onl~    420           0  1.96e5
##  4 2020     Data in recent weeks are incomplete. Onl~    394          16  4.47e4
##  5 2020     Data in recent weeks are incomplete. Onl~   1362           3  2.99e5
##  6 2020     Data in recent weeks are incomplete. Onl~     12           0  5.54e3
##  7 2020     Estimates for Pennsylvania are too low f~     36           0  1.69e4
##  8 2020     Weights may be too low to account for un~    248           0  6.06e4
##  9 2020     Weights may be too low to account for un~     12           0  5.69e3
## 10 2020     <NA>                                        2066           0  8.77e5
##    state         Jurisdiction    n n_deaths_na   deaths
## 1     US        United States 3600           0 32767366
## 2     CA           California 3600           0  3119418
## 3     FL              Florida 3600           0  2395987
## 4     TX                Texas 3600           0  2358271
## 5     PA         Pennsylvania 3600           0  1571585
## 6     OH                 Ohio 3600           0  1416195
## 7     IL             Illinois 3600           0  1237033
## 8     NY             New York 3600           0  1171620
## 9     MI             Michigan 3599           0  1125676
## 10    NC       North Carolina 3536           8  1060294
## 11    GA              Georgia 3600           0   982315
## 12    NJ           New Jersey 3594           0   878338
## 13    TN            Tennessee 3600           0   855255
## 14    VA             Virginia 3600           0   785789
## 15    IN              Indiana 3598           0   760849
## 16    MO             Missouri 3597           0   740140
## 17    AZ              Arizona 3600           0   694914
## 18    MA        Massachusetts 3568           0   694218
## 19    YC        New York City 3596           0   678875
## 20    WA           Washington 3600           0   655215
## 21    AL              Alabama 3596           0   608067
## 22    WI            Wisconsin 3581           0   601074
## 23    MD             Maryland 3594           0   578852
## 24    SC       South Carolina 3598           0   570629
## 25    KY             Kentucky 3564           0   554152
## 26    LA            Louisiana 3590           0   534961
## 27    MN            Minnesota 3557           0   511091
## 28    CO             Colorado 3598           0   453533
## 29    OK             Oklahoma 3590           0   452162
## 30    OR               Oregon 3429           0   419882
## 31    MS          Mississippi 3533           0   369852
## 32    AR             Arkansas 3497           0   367683
## 33    CT          Connecticut 3155           8   361512
## 34    IA                 Iowa 3232           0   344962
## 35    PR          Puerto Rico 3317           0   336978
## 36    KS               Kansas 3292           0   301264
## 37    NV               Nevada 3339           0   293938
## 38    WV        West Virginia 3042           3   253845
## 39    UT                 Utah 3486           0   217243
## 40    NM           New Mexico 3180           0   208428
## 41    NE             Nebraska 2886           0   190958
## 42    ME                Maine 2686           0   162773
## 43    ID                Idaho 2804           0   155435
## 44    NH        New Hampshire 2708           0   137002
## 45    HI               Hawaii 2596           0   126342
## 46    RI         Rhode Island 2513           0   116090
## 47    MT              Montana 2592           0   111506
## 48    DE             Delaware 2599           0   101066
## 49    SD         South Dakota 2485           0    87752
## 50    ND         North Dakota 2467           0    76289
## 51    DC District of Columbia 2581           0    65035
## 52    VT              Vermont 2370           0    62844
## 53    WY              Wyoming 2356           0    48148
## 54    AK               Alaska 2385           0    42892

## Observations: 176,686
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## Observations: 86,667
## Variables: 11
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-17, 2015-01-24, 2015-01-31, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,...
## $ age          <fct> 25-44 years, 25-44 years, 25-44 years, 25-44 years, 25...
## $ deaths       <dbl> 67, 49, 55, 59, 47, 59, 41, 47, 59, 57, 54, 50, 58, 42...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## Confirm that data suppressions and NA deaths have been aliminated:
## # A tibble: 0 x 11
## # ... with 11 variables: Jurisdiction <chr>, weekEnding <date>, state <chr>,
## #   year <fct>, week <int>, age <fct>, deaths <dbl>, period <fct>, type <chr>,
## #   Suppress <chr>, Note <chr>
## Observations: 81,511
## Variables: 12
## $ Jurisdiction <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",...
## $ weekEnding   <date> 2015-01-10, 2015-01-10, 2015-01-10, 2015-01-10, 2015-...
## $ state        <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", ...
## $ year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, ...
## $ week         <int> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, ...
## $ age          <fct> Under 25 years, 25-44 years, 45-64 years, 65-74 years,...
## $ period       <fct> 2015-2019, 2015-2019, 2015-2019, 2015-2019, 2015-2019,...
## $ type         <chr> "Predicted (weighted)", "Predicted (weighted)", "Predi...
## $ Suppress     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ n            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ deaths       <dbl> 25, 67, 253, 202, 272, 320, 28, 49, 256, 222, 253, 332...
## $ Note         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## 
## First duplicate is in row number (0 means no duplicates): 0

## Warning: Column `state` joining factor and character vector, coercing into
## character vector

## Joining, by = "state"

## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).

The evolution of the county-level segments from September 3 to October 15 can also be explored. Since the process is run using kmeans, segment numbers have no inherent meaning:

# Previous and current county clusters data
ctyChange <- tibble::tibble(fips1=names(readFromRDS("clustVec_county_20200903_new")), 
                            fips2=names(cty_20201015$clustVec),
                            cluster1=readFromRDS("clustVec_county_20200903_new"), 
                            cluster2=cty_20201015$clustVec, 
                            mism=(fips1!=fips2), 
                            fips=stringr::str_pad(fips1, side="left", pad="0", width=5)
                            ) %>%
    left_join(usmap::countypop, by="fips")

# Control totals
ctyChange %>%
    summarize(sumMismatch=sum(mism), sumna=sum(is.na(county)), sumpop=sum(pop_2015))
## # A tibble: 1 x 3
##   sumMismatch sumna    sumpop
##         <int> <int>     <dbl>
## 1           0     0 301897023
# Change in segments
ctyChange %>%
    group_by(cluster1, cluster2) %>%
    summarize(n=n(), pop=sum(pop_2015)) %>%
    ggplot(aes(x=factor(cluster2), y=factor(cluster1))) + 
    geom_tile(aes(fill=pop)) + 
    geom_text(aes(label=paste0("n=", n, "\nPop. ", round(pop/1000000, 1), " mln"))) + 
    scale_fill_continuous("Population", high="lightgreen", low="white") + 
    labs(x="New Cluster (Oct 15)", y="Old Cluster (Sep 3)", title="Evolution of County Clusters")

There has been some meaningful evolution of the county-level clusters, particularly for the small cluster with a large disease burden. The totals are examined:

smallCluster <- ctyChange %>%
    count(cluster1) %>%
    filter(n==min(n)) %>%
    pull(cluster1)

segChange <- ctyChange %>%
    filter(cluster1==smallCluster) %>%
    arrange(cluster2, -pop_2015) %>%
    select(-fips1, -fips2, -mism)

segChange %>%
    filter(pop_2015 >= 250000) %>%
    as.data.frame()
##    cluster1 cluster2  fips abbr             county pop_2015
## 1         3        2 36047   NY       Kings County  2636735
## 2         3        2 36081   NY      Queens County  2339150
## 3         3        2 26163   MI       Wayne County  1759335
## 4         3        2 36061   NY    New York County  1644518
## 5         3        2 36005   NY       Bronx County  1455444
## 6         3        2 36059   NY      Nassau County  1361350
## 7         3        2 36119   NY Westchester County   976396
## 8         3        2 09001   CT   Fairfield County   948053
## 9         3        2 34003   NJ      Bergen County   938506
## 10        3        2 09003   CT    Hartford County   895841
## 11        3        2 34023   NJ   Middlesex County   840900
## 12        3        2 34013   NJ       Essex County   797434
## 13        3        2 25009   MA       Essex County   776043
## 14        3        2 25021   MA     Norfolk County   696023
## 15        3        2 34017   NJ      Hudson County   674836
## 16        3        2 34029   NJ       Ocean County   588721
## 17        3        2 34039   NJ       Union County   555786
## 18        3        2 34031   NJ     Passaic County   510916
## 19        3        2 25023   MA    Plymouth County   510393
## 20        3        2 34027   NJ      Morris County   499509
## 21        3        2 36085   NY    Richmond County   474558
## 22        3        2 25013   MA     Hampden County   470690
## 23        3        2 48061   TX     Cameron County   422156
## 24        3        2 22071   LA     Orleans Parish   389617
## 25        3        2 34021   NJ      Mercer County   371398
## 26        3        2 34035   NJ    Somerset County   333654
## 27        3        2 36087   NY    Rockland County   326037
## 28        3        3 36103   NY     Suffolk County  1501587
## 29        3        3 25025   MA     Suffolk County   778121
## 30        3        3 34025   NJ    Monmouth County   628715
segChange %>%
    mutate(cluster2=factor(cluster2)) %>%
    filter(pop_2015 >= 100000) %>%
    usmap::plot_usmap(regions="counties", data=., values="cluster2") + 
    scale_fill_discrete("New segment", na.value="white")

cty_20201015$burdenData %>%
    select(countyFIPS, date, cumDeaths, population, countyName=county) %>%
    mutate(countyFIPS=stringr::str_pad(countyFIPS, side="left", width=5, pad="0")) %>%
    inner_join(select(ctyChange, fips, cluster1, cluster2), by=c("countyFIPS"="fips")) %>%
    group_by(cluster1, cluster2, date) %>%
    summarize(cumDeaths=sum(cumDeaths), population=sum(population)) %>%
    ggplot(aes(x=date, y=1000000*cumDeaths/population)) + 
    geom_line(aes(color=factor(cluster2))) + 
    facet_grid(paste0("New ", cluster2)~paste0("Old ", cluster1)) + 
    labs(x="Date", 
         y="Cumulative Deaths per million", 
         title="Deaths per million by segment", 
         subtitle="Old segment (03 SEP) and new segment (15 OCT)"
         )

The segmenting methodology puts a much higher weight on total burden than on shape of the curve. So, as some of the (mostly) southern counties have had significant burden increases, the segments have been reorganizing to reflect that.

Next steps are to work with the county clusters using a higher weight on shape of the curve. It will be meaningful to have distinct early/high and late/high segments similar to what is seen with the states. The patterns appear potentially similar to the states:

  • Early and high
  • Early and moderate
  • Late and high
  • Late and moderate
  • Persistently moderate
  • Persistently low

The county clustering process is updated to allow for hierarchical clustering and a more rules-based clustering based on simple heuristics (high, medium, low; early, sustained, late):

# Function to run the USA Facts (US county-level coronavirus data) clustering process
readRunUSAFacts <- function(maxDate, 
                            popLoc, 
                            caseLoc, 
                            deathLoc, 
                            dlPop=FALSE, 
                            dlCaseDeath=FALSE, 
                            ovrWrite=FALSE, 
                            ovrWriteError=TRUE, 
                            oldFile=NULL, 
                            showBurdenMinPop=10000, 
                            minPopCluster=25000,
                            existingStateClusters=NULL, 
                            existingCountyClusters=NULL, 
                            createClusters=FALSE, 
                            hierarchical=FALSE,
                            kCut=6,
                            ...
                            ) {
    
    # FUNCTION ARGUMENTS:
    # maxDate: the maximum data to use for data from the cases and deaths file
    # popLoc: location where the county-level population data are stored
    # caseLoc: location where the county-level cases data are stored
    # deathLoc: location where the county-level deaths data are stored
    # dlPop: boolean, should new population data be downloaded to popLoc
    # dlCaseDeath: boolean, should new case data and death data be downloaded to caseLoc and deathLoc
    # ovrWrite: boolean, if data are downloaded to an existing file, should it be over-written
    # ovrWriteError: boolean, if ovrWrite is FALSE and an attempt to overwrite is made, should it error out?
    # oldFile: old file for comparing metrics against (NULL means no old file for comarisons)
    # showBurdenMinPop: minimum population for showing in burden by cluster plots (NULL means skip plot)
    # minPopCluster: minimum population for including county in running cluster-level metrics
    # existingStateClusters: location of an existing named vector with clusters by state (NULL means none)
    # existingCountyClusters: location of an existing named vector with clusters by county (NULL means none)
    #                         if existingStateClusters is not NULL, then existingCountyClusters is ignored
    # createClusters: boolean, whether to create new clusters (only set up for kmeans)
    # hierarchical: whether to create hierarchical clusters
    #               TRUE means run hierarchical clustering
    #               FALSE means run kmeans clustering
    #               NA means run rules-based clustering
    # kCut; if hierarchical clustering is used, what k (number of clusters in cutree) should be used?
    # ...: other arguments that will be passed to prepClusterCounties
    
    # STEP 0: Download new files (if requested)
    urlCase <- "https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_confirmed_usafacts.csv"
    urlDeath <- "https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv"
    urlPop <- "https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_county_population_usafacts.csv"
    
    # Helper function to download a file
    helperDownload <- function(url, loc, ovrWrite=ovrWrite, ovrWriteError=ovrWriteError) {
        # If the file exists, mention it and proceed as per the guard checks
        if (file.exists(loc)) {
            cat("\nFile:", loc, "already exists\n")
            if (!ovrWrite & ovrWriteError) stop("\nExiting due to ovrWrite=FALSE and ovrWriteError=TRUE\n")
            if (!ovrWrite & !ovrWriteError) {
                cat("\nFile is NOT downloaded again\n")
                return(NULL)
            }
        }
        # Download the file and change to read-only
        download.file(url, destfile=loc, method="curl")
        Sys.chmod(loc, mode="0555", use_umask = FALSE)
    }
    
    if (dlPop) helperDownload(urlPop, loc=popLoc)
    if (dlCaseDeath) helperDownload(urlCase, loc=caseLoc)
    if (dlCaseDeath) helperDownload(urlDeath, loc=deathLoc)
    
    # STEP 1: Read in the population file
    pop <- readr::read_csv(popLoc) %>%
        rename(countyName=`County Name`, state=State)
    
    # STEP 2: Read case and death data, combine, and add population totals and existing clusters
    burdenData <- readUSAFacts(
        caseFile=caseLoc, 
        deathFile=deathLoc, 
        countyPopFile=pop,
        oldFile=oldFile,
        showBurdenMinPop=showBurdenMinPop,
        maxDate=maxDate,
        stateClusters=existingStateClusters, 
        countyClusters=existingCountyClusters, 
        glimpseRaw=FALSE
    )
    
    # STEP 3: Create appropriately filtered data, and new clusters if requested
    clusterData <- prepClusterCounties(burdenFile=burdenData, 
                                       maxDate=maxDate, 
                                       minPop=minPopCluster,
                                       createClusters=createClusters, 
                                       hierarchical=hierarchical, 
                                       returnList=TRUE,
                                       ...
    )
    
    # STEP 4: Assess clusters against the new data
    # STEP 4a: Extract the county-level clusters (new clusters if created, existing otherwise)
    if (createClusters) {
        if (is.na(hierarchical)) clustVec <- clusterData$objCluster$objCluster
        else if (hierarchical) clustVec <- cutree(clusterData$objCluster$objCluster, k=kCut)
        else clustVec <- clusterData$objCluster$objCluster$cluster
    }
    else {
        clustVec <- existingCountyClusters
    }
    
    # STEP 4b: Show the cumulative data, order by cluster, and keep the plots together
    helperACC_county <- helperAssessCountyClusters(vecCluster=clustVec, 
                                                   dfPop=clusterData$countyFiltered, 
                                                   dfBurden=clusterData$countyFiltered, 
                                                   showCum=TRUE,
                                                   thruLabel=format(as.Date(maxDate), "%b %d, %Y"), 
                                                   plotsTogether=TRUE, 
                                                   orderCluster=TRUE
    )
    
    # STEP 5: Add back clusters not used for analysis (code 999) and associated disease data
    # May want to change the approach to population data
    clusterStateData <- helperMakeClusterStateData(dfPlot=helperACC_county, 
                                                   dfPop=usmap::countypop,
                                                   dfBurden=clusterData$countyDailyPerCapita,
                                                   orderCluster=TRUE
    )
    
    # STEP 6: Return a list of the key files
    list(pop=pop, 
         burdenData=burdenData, 
         clusterData=clusterData, 
         clustVec=clustVec, 
         helperACC_county=helperACC_county, 
         clusterStateData=clusterStateData,
         maxDate=maxDate
    )
    
}



# STEP 4: Create county-level clusters
prepClusterCounties <- function(burdenFile, 
                                maxDate,
                                minPop, 
                                createClusters=TRUE,
                                hierarchical=FALSE, 
                                returnList=!hierarchical, 
                                ...
                                ) {
    
    # FUNCTION ARGUMENTS:
    # burdenFile: the pivoted file containing the burdens data
    # maxDate: the latest date to be used from the data
    # minPop: the smallest population for a county to be included
    # createClusters: boolean, whether to create county-level clusters (if FALSE, object returned as NULL)
    #                 FALSE would be for prepping and converting data only (those objects are returned)
    # hierarchical: whether to create hierarchical clusters
    #               TRUE means run hierarchical clustering
    #               FALSE means run kmeans clustering
    #               NA means run rules-based clustering
    # returnList: whether the clustering call returns a list (only set up for TRUE as of now)
    # ...: other arguments to be passed to clusterStates
    
    # STEP 1: Select only desired variables from burden file
    countyCumPerCapita <- burdenFile %>%
        select(state=countyFIPS, date, cpm=cumCasesPer, dpm=cumDeathPer, population) %>%
        arrange(state, date)
    
    # STEP 2: Confirm that there are no duplicates and that every county has the same dates
    # This should be 1 provided that there are no duplicates
    countyCumPerCapita %>% 
        count(state, date) %>% 
        pull(n) %>% 
        max()
    
    # This should have no standard deviation if the same number of records exist on every day
    countyCumPerCapita %>%
        mutate(n=1) %>%
        group_by(date) %>%
        summarize(n=sum(n), population=sum(population)) %>%
        summarize_at(vars(all_of(c("n", "population"))), .funs=list(sd=sd, min=min, max=max))
    
    # STEP 3: Convert to daily new totals rather than cumulative data
    countyDailyPerCapita <- countyCumPerCapita %>%
        group_by(state) %>%
        arrange(date) %>%
        mutate_at(vars(all_of(c("cpm", "dpm"))), ~ifelse(row_number()==1, ., .-lag(.))) %>%
        ungroup()
    
    # STEP 4: Add rolling 7 aggregates and total cases/deaths
    countyDailyPerCapita <- countyDailyPerCapita %>%
        arrange(state, date) %>%
        group_by(state) %>%
        helperRollingAgg(origVar="cpm", newName="cpm7", k=7) %>%
        helperRollingAgg(origVar="dpm", newName="dpm7", k=7) %>%
        ungroup() %>%
        mutate(cases=cpm*population/1000000, deaths=dpm*population/1000000)
    
    # STEP 5: Filter the data prior to clustering
    countyFiltered <- countyDailyPerCapita %>%
        filter(population >= minPop, date <= as.Date(maxDate)) %>%
        mutate(state=as.character(state))
    
    # STEP 6: Check the implications of the filtering
    # Check number of counties that will fail the test for 100 deaths per million or 5000 cases per million
    is0 <- function(x) mean(x==0)
    isltn <- function(x, n) mean(x<n)
    islt100 <- function(x) isltn(x, n=100)
    islt5000 <- function(x) isltn(x, n=5000)
    
    countyFiltered %>% 
        group_by(state) %>% 
        summarize_at(c("cpm", "dpm"), sum) %>% 
        ungroup() %>%
        summarize_at(vars(all_of(c("cpm", "dpm"))), 
                     .funs=list(mean_is0=is0, mean_lt100=islt100, mean_lt5000=islt5000)
        ) %>%
        print()
    
    # Run county-level clusters if requested, otherwise store as NULL
    objCluster <- if(createClusters) {
        clusterStates(countyFiltered, hierarchical=hierarchical, returnList=returnList, ...)
    } else {
        NULL
    }
    
    # Return all of the relevant objects
    list(objCluster=objCluster, 
         countyFiltered=countyFiltered, 
         countyDailyPerCapita=countyDailyPerCapita, 
         countyCumPerCapita=countyCumPerCapita
    )
    
}



# Function to create clusters for the state data (requires all data from same year, as currently true)
clusterStates <- function(df, 
                          caseVar="cpm", 
                          deathVar="dpm",
                          shapeFunc=lubridate::month, 
                          minShape=NULL, 
                          minDeath=0,
                          maxDeath=Inf,
                          minCase=0,
                          maxCase=Inf,
                          ratioTotalvsShape=1, 
                          ratioDeathvsCase=1, 
                          hierarchical=TRUE, 
                          hierMethod="complete", 
                          nCenters=3, 
                          iter.max=10,
                          nstart=1,
                          testCenters=NULL,
                          returnList=FALSE, 
                          seed=NULL
                          ) {
    
    # FUNCTION ARGUMENTS:
    # df: the data frame containing cases and deaths data
    # caseVar: the variable containing the cases per capita data
    # deathVar: the variable containing the deaths per capita data
    # shapeFunc: the function to be used for creating the shape of the curve
    # minShape: the minimum value to be used for shape (to avoid very small amounts of data in Jan/Feb)
    #           NULL means keep everything
    # minDeath: use this value as a floor for the death metric when calculating shape
    # maxDeath: use this value as a maximum when calculating distance using deaths 
    # minCase: use this value as a floor for the case metric when calculating shape
    # maxCase: use this value as a maximum when calculating distance using cases 
    # ratioTotalvsShape: amount of standard deviation to be kept in total variable vs shape variables
    # ratioDeathvsCase: amount of standard deviation to be kept in deaths vs cases 
    #                   (total death data will be scaled to have sd this many times higher than cases)
    #                   (death percentages by time period will be scaled directly by this amount)
    # hierarchical: whether to create hierarchical clusters
    #               TRUE means run hierarchical clustering
    #               FALSE means run kmeans clustering
    #               NA means run rules-based clustering
    # hierMethod: the method for hierarchical clustering (e.g., 'complete' or 'single')
    # nCenters: the number of centers to use for kmeans clustering
    # testCenters: integer vector of centers to test (will create an elbow plot); NULL means do not test
    # iter.max: maximumum number of kmeans iterations (default in kmeans algorithm is 10)
    # nstart: number of random sets chosen for kmeans (default in kmeans algorithm is 1)
    # returnList: boolean, if FALSE just the cluster object is returned
    #                      if TRUE, a list is returned with dfCluster and the cluster object
    # seed: set the seed to this value (NULL means no seed)
    
    # Extract key information (aggregates and by shapeFunc for each state)
    df <- df %>%
        select_at(vars(all_of(c("date", "state", caseVar, deathVar)))) %>%
        purrr::set_names(c("date", "state", "cases", "deaths")) %>%
        mutate(timeBucket=shapeFunc(date)) %>%
        group_by(state, timeBucket) %>%
        summarize(cases=sum(cases), deaths=sum(deaths)) %>%
        ungroup()
    
    # Limit to only relevant time buckets if requested
    if (!is.null(minShape)) {
        df <- df %>%
            filter(timeBucket >= minShape)
    }
    
    # Extract an aggregate by state, scaled so that they have the proper ratio
    dfAgg <- df %>%
        group_by(state) %>%
        summarize(totalCases=sum(cases), totalDeaths=sum(deaths)) %>%
        mutate(totalCases=pmin(totalCases, maxCase), totalDeaths=pmin(totalDeaths, maxDeath)) %>%
        ungroup() %>%
        mutate(totalDeaths=ratioDeathvsCase*totalDeaths*sd(totalCases)/sd(totalDeaths))
    
    # Extract the percentages (shapes) by month, scaled so that they have the proper ratio
    dfShape <- df %>%
        pivot_longer(-c(state, timeBucket)) %>%
        group_by(state, name) %>%
        mutate(tot=pmax(sum(value), ifelse(name=="deaths", minDeath, minCase)), 
               value=ifelse(name=="deaths", ratioDeathvsCase, 1) * value / tot) %>%
        select(-tot) %>%
        pivot_wider(state, names_from=c(name, timeBucket), values_from=value) %>%
        ungroup()
    
    # Function to calculate SD of a subset of columns
    calcSumSD <- function(df) {
        df %>% 
            ungroup() %>% 
            select(-state) %>% 
            summarize_all(.funs=sd) %>% 
            as.vector() %>% 
            sum()
    }
    
    # Down-weight the aggregate data so that there is the proper sum of sd in aggregates and shapes
    aggSD <- calcSumSD(dfAgg)
    shapeSD <- calcSumSD(dfShape)
    dfAgg <- dfAgg %>%
        mutate_if(is.numeric, ~. * ratioTotalvsShape * shapeSD / aggSD)
    
    # Combine so there is one row per state
    dfCluster <- dfAgg %>%
        inner_join(dfShape, by="state")
    
    # convert 'state' to rowname
    keyData <- dfCluster %>% column_to_rownames("state")
    
    # Create hierarchical segments or kmeans segments
    if (is.na(hierarchical)) {
        # Create pseudo-rules-based segments
        if (!is.null(seed)) set.seed(seed)
        # STEP 1: Classify high-medium-low based on deaths and cases
        hml <- kmeans(select(keyData, starts_with("total")), centers=3)
        # STEP 2: Classify early-late based on shape
        esl <- kmeans(select(keyData, -starts_with("total")), centers=2)
        # STEP 3: Create a final segment
        objCluster <- 2*(hml$cluster-1) + esl$cluster
    } else if (hierarchical) {
        # Create hierarchical segments
        objCluster <-  hclust(dist(keyData), method=hierMethod)
        plot(objCluster)
    } else {
        # Create k-means segments
        # Create an elbow plot if testCenters is not NULL
        if (!is.null(testCenters)) {
            helperElbow(keyData, testCenters=testCenters, iter.max=iter.max, nstart=nstart, silhouette=TRUE)
        }
        # Create the kmeans cluster object, setting a seed if requested
        if (!is.null(seed)) set.seed(seed)
        objCluster <- kmeans(keyData, centers=nCenters, iter.max=iter.max, nstart=nstart)
        cat("\nCluster means and counts\n")
        n=objCluster$size %>% cbind(objCluster$centers) %>% round(2) %>% t() %>% print()
    }
    
    # Return the data and object is a list if returnList is TRUE, otherwise return only the clustering object
    if (!isTRUE(returnList)) {
        objCluster
    } else {
        list(objCluster=objCluster, dfCluster=dfCluster)
    }
    
}



# Create segments and download data from USA Facts
caseLoc <- "./RInputFiles/Coronavirus/covid_confirmed_usafacts_downloaded_20201015.csv"
deathLoc <- "./RInputFiles/Coronavirus/covid_deaths_usafacts_downloaded_20201015.csv"

# Create 3x2 clusters for hml and shape
cty_20201015_rule6 <- readRunUSAFacts(maxDate="2020-10-13", 
                                      popLoc="./RInputFiles/Coronavirus/covid_county_population_usafacts.csv", 
                                      caseLoc=caseLoc, 
                                      deathLoc=deathLoc, 
                                      dlCaseDeath=!(file.exists(caseLoc) & file.exists(deathLoc)),
                                      oldFile=readFromRDS("burden_20200903_new"), 
                                      existingStateClusters=test_hier6_201014$useClusters,
                                      createClusters=TRUE, 
                                      hierarchical=NA,
                                      minShape=3,
                                      ratioDeathvsCase = 5,
                                      ratioTotalvsShape = 0.25,
                                      minDeath=100,
                                      minCase=5000
                                      )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0157              0          0.147          0.0559
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Recency is defined as 2020-09-14 through current
## 
## Recency is defined as 2020-09-14 through current
## Warning: Removed 1 rows containing missing values (geom_point).

## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"

At a glance, the rule-based approach creates reasonable county-level segments that are generally well differentiated as far as burden and shape. Next steps are to continue to explore the county-level segmenting approach.

The rules-based approach pulls apart differences in both shape and magnitude, focused mainly on coronavirus deaths. The cases data are less reliable given the significant variation in testing by locale and timing.

Suppose that three archetype deaths per million by month are examined:

# Archetype curves for deaths by month
archeDeaths <- tibble::tibble(mon=factor(rep(month.abb[3:9], times=3), levels=month.abb), 
                              lvl=rep(letters[1:3], each=7), 
                              dpm=c(0, 250, 750, 500, 250, 100, 50, 
                                    0, 25, 200, 200, 125, 50, 50, 
                                    0, 0, 50, 50, 50, 150, 200
                                    )
                              )

# Plot of dpm by archetype
p1 <- archeDeaths %>%
    ggplot(aes(x=mon, y=dpm)) + 
    geom_line(aes(group=lvl, color=lvl)) + 
    labs(x="", y="Deaths per million", title="Simulated archetype deaths per million") + 
    scale_color_discrete("Archetype")
p1

p1 + facet_wrap(~lvl, scales="free_y") + labs(subtitle="Each facet is on its own scale")

The rules based segmentation will pull these apart. Archetype a is high and early, archetype b is medium and early, archetype c is medium and late. Any reasonable distance metric will likely separate a and c, but a further objective is to have b (very) roughly equidistant from both a and c. The distance from a is driven by total volume and the distance from c is driven by shape.

Suppose that standard euclidean distance is calculated with the burden by month completely unscaled:

archeDeaths %>%
    pivot_wider(lvl, names_from=mon, values_from=dpm) %>%
    column_to_rownames("lvl") %>%
    dist(method="euclidean", upper=TRUE)
##          a        b        c
## a          679.1539 905.5385
## b 679.1539          289.3959
## c 905.5385 289.3959

As expected, a and c are the most distant from each other, but b is twice as close to c as to a based on similarity in disease burden. This may tend to drive very different shapes in to the same segment, forcing the segmentation to be “too similar” to just a ranking by total deaths.

Suppose instead that a log transformation is applied to the deaths by month:

archeDeaths %>%
    mutate(logdpm=log10(pmax(dpm, 0)+1)) %>%
    pivot_wider(lvl, names_from=mon, values_from=logdpm) %>%
    column_to_rownames("lvl") %>%
    dist(method="euclidean", upper=TRUE)
##          a        b        c
## a          1.277613 2.995279
## b 1.277613          1.855526
## c 2.995279 1.855526

This approach is encouraging and can be applied to the rules-based segmentation data:

ruleSegData <- cty_20201015_rule6$clusterStateData %>%
    filter(!is.na(date)) %>%
    mutate(mon=factor(month.abb[mon=lubridate::month(date)], levels=month.abb)) %>%
    group_by(fipsCounty, cluster, mon) %>%
    summarize(deaths=sum(deaths), pop=mean(pop)) %>%
    group_by(cluster, mon) %>%
    summarize(deaths=sum(deaths), pop=sum(pop)) %>%
    ungroup() %>%
    mutate(dpm=1000000*deaths/pop) %>%
    filter(mon %in% month.abb[3:9])

p2 <- ruleSegData %>%
    ggplot(aes(x=mon, y=dpm)) + 
    geom_line(aes(group=cluster, color=cluster)) + 
    labs(x="", y="Deaths per million", title="Deather per million by cluster") + 
    scale_color_discrete("Cluster")
p2

p2 + facet_wrap(~cluster, scales="free_y") + labs(subtitle="Each facet is on its own scale")

segDist <- ruleSegData %>%
    mutate(logdpm=log10(1+pmax(0, dpm))) %>%
    pivot_wider(cluster, names_from=mon, values_from="logdpm") %>%
    column_to_rownames("cluster") %>%
    dist(method="euclidean", upper=TRUE, diag=TRUE) %>% as.matrix() %>% as.data.frame() %>%
    rownames_to_column("clusterA") %>% pivot_longer(-clusterA, names_to="clusterB")

segDist %>%
    filter(clusterA != clusterB, clusterA != 999, clusterB != 999) %>%
    ggplot(aes(x=clusterA, y=clusterB)) + 
    geom_tile(aes(fill=value)) + 
    geom_text(aes(label=round(value, 1))) + 
    scale_fill_continuous(high="red", low="white", "Distance") + 
    labs(x="", 
         y="", 
         title="Euclidean distance between cluster averages", 
         subtitle="Calculated using log10 deaths per million by month"
         )

The distances seem sensible, with the methodology assigning high distance when either magnitude OR shape is different. Suppose the methodology is applied to all counties that are not “too small” (classified as 999):

testSegData <- cty_20201015_rule6$clusterStateData %>%
    filter(!is.na(date), cluster != 999) %>%
    mutate(mon=factor(month.abb[mon=lubridate::month(date)], levels=month.abb)) %>%
    group_by(fipsCounty, cluster, mon) %>%
    summarize(deaths=sum(deaths), pop=mean(pop)) %>%
    ungroup() %>%
    mutate(dpm=1000000*deaths/pop, logdpm=log10(1+pmax(0, dpm))) %>%
    filter(mon %in% month.abb[3:9])

testDistData <- testSegData %>%
    pivot_wider(fipsCounty, names_from=mon, values_from=logdpm) %>%
    column_to_rownames("fipsCounty") %>%
    dist()

testHierClust <- hclust(testDistData, method="complete")
plot(testHierClust)

The dendrogram is encouraging, with an early split in to three, each followed by a split in two. Suppose that 6 segments are created from this:

testClustVec <- cutree(testHierClust, k=6)

testSegOut <- testSegData %>%
    mutate(newCluster=testClustVec[fipsCounty])

testSegOut %>%
    group_by(fipsCounty, cluster, newCluster) %>%
    summarize(deaths=sum(deaths), pop=mean(pop)) %>%
    group_by(newCluster, cluster) %>%
    summarize(n=n(), pop=sum(pop), deaths=sum(deaths)) %>%
    ggplot(aes(x=factor(newCluster), y=cluster)) + 
    geom_tile(aes(fill=pop/1000000)) + 
    geom_text(aes(label=paste0(round(pop/1000000, 1), " million\n(n=", n, ")"))) + 
    scale_fill_continuous("Pop (millions)", high="lightgreen", low="white") + 
    labs(x="New cluster", y="Rules-based cluster", title="Change in cluster by methodology")

testSegOut %>%
    group_by(mon, cluster, newCluster) %>%
    summarize(deaths=sum(deaths), pop=sum(pop), n=n()) %>%
    ungroup() %>%
    mutate(dpm=1000000*deaths/pop, newCluster=factor(newCluster)) %>%
    ggplot(aes(x=mon, y=dpm)) + 
    geom_line(aes(color=newCluster, group=newCluster)) + 
    geom_text(aes(x="Jun", y=750, label=paste0("n=", n))) +
    facet_grid(newCluster ~ cluster) +
    labs(x="Rules-based cluster", y="New cluster", title="Deaths per million by cluster") + 
    scale_color_discrete("New cluster")

Further exploration is needed to understand the driver behind splitting some observations that, at a glance, look very similar.

The issue with the log10 approach is that zero and small start to become better differentiated than small and large (e.g., 0 deaths per million becomes 0, 100 deaths per million becomes 2, and 1000 deaths per million becomes 3). So, the goal of reducing spurious differentiation among outliers on the high end (2000 is 3.3., 4000 is 3.6) has been achieved, but at the cost of losing the power to appropriately separate high, medium, and low.

An additional approach could take advantage of the timing of deaths in the national curve (using only the counties that meet the population elgibility criteria:

ruleNationData <- cty_20201015_rule6$clusterStateData %>%
    filter(!is.na(date)) %>%
    mutate(mon=factor(month.abb[mon=lubridate::month(date)], levels=month.abb)) %>%
    group_by(cluster, date) %>%
    summarize(deaths=sum(deaths), pop=sum(pop)) %>%
    mutate(dpm=1000000*deaths/pop)

ruleNationData %>%
    filter(cluster!=999) %>%
    ggplot(aes(x=date, y=dpm)) + 
    geom_line() + 
    geom_smooth(color="red", span=0.25, method="loess") +
    labs(x="", 
         y="Deaths per million", 
         title="US coronavirus deaths per million by day", 
         subtitle="Caution that each facet has its own y scale"
         ) + 
    scale_x_date(date_breaks="1 month", date_labels="%b") + 
    facet_wrap(~cluster, scales="free_y")

ruleNationData %>%
    filter(cluster!=999) %>%
    mutate(mon=factor(month.abb[lubridate::month(date)], levels=month.abb)) %>%
    group_by(cluster, mon) %>%
    summarize(deaths=sum(deaths), pop=mean(pop)) %>%
    mutate(dpm=1000000*deaths/pop) %>%
    ggplot(aes(x=mon, y=dpm)) + 
    geom_line(aes(group=cluster, color=cluster)) + 
    labs(x="", 
         y="Deaths per million", 
         title="US coronavirus deaths per million by month", 
         subtitle="Caution that each facet has its own y scale and October is only a partial month"
         ) + 
    facet_wrap(~cluster, scales="free_y")

The rule-based segments are nicely differentiated visually, both as far as shape and maximum y-axis value.

There is no meaningful spike in coronavirus until April and October is an incomplete month. Perhaps distance metrics would better be calculated using:

  • April and earlier
  • May-June
  • July-August
  • September and later

This avoids some of the spurious zero points that emerge from treating each month in each county as a separate observation:

ruleNationData %>%
    filter(cluster!=999) %>%
    mutate(mon=factor(month.abb[lubridate::month(date)], levels=month.abb), 
           monGroup=case_when(mon %in% c("Jan", "Feb", "Mar", "Apr") ~ "thru Apr", 
                              mon %in% c("May", "Jun") ~ "May-Jun", 
                              mon %in% c("Jul", "Aug") ~ "Jul-Aug", 
                              mon %in% c("Sep", "Oct") ~ "Sep and later",
                              TRUE ~ "Map error"
                              ), 
           monGroup=factor(monGroup, levels=c("thru Apr", "May-Jun", "Jul-Aug", "Sep and later", "Map error"))
           ) %>%
    group_by(cluster, monGroup) %>%
    summarize(deaths=sum(deaths), pop=mean(pop)) %>%
    mutate(dpm=1000000*deaths/pop) %>%
    ggplot(aes(x=monGroup, y=dpm)) + 
    geom_line(aes(group=cluster, color=cluster)) + 
    labs(x="", 
         y="Deaths per million", 
         title="US coronavirus deaths per million by month", 
         subtitle="Caution that each facet has its own y scale and October is only a partial month"
         ) + 
    ylim(c(0, NA)) +
    facet_wrap(~cluster, scales="free_y")

There are now three distinct shapes of the curve:

  • Peak in Jul-Aug (one at ~800 dpm, one at ~400 dpm, one at ~100 dpm)
  • Peak in May-Jun (one at ~400 dpm, one at ~100 dpm)
  • Peak in thru Apr (one at ~1000 dpm)

Arguably, the rules-based segmenting approach outperforms attempts to calculate a single distance metric between the counties. The risk is that there may be underlying patterns in the data that are not detected by a pre-determined set of rules.

Suppose that the CI for the individual counties is also plotted on the same chart:

allCountyRules <- cty_20201015_rule6$clusterStateData %>%
    filter(!is.na(date)) %>%
    mutate(mon=factor(month.abb[mon=lubridate::month(date)], levels=month.abb), 
                      monGroup=case_when(mon %in% c("Jan", "Feb", "Mar", "Apr") ~ "thru Apr", 
                              mon %in% c("May", "Jun") ~ "May-Jun", 
                              mon %in% c("Jul", "Aug") ~ "Jul-Aug", 
                              mon %in% c("Sep", "Oct") ~ "Sep and later",
                              TRUE ~ "Map error"
                              ), 
           monGroup=factor(monGroup, levels=c("thru Apr", "May-Jun", "Jul-Aug", "Sep and later", "Map error"))
           ) %>%
    group_by(fipsCounty, countyName, state, cluster, monGroup) %>%
    summarize(deaths=sum(deaths), pop=mean(pop)) %>%
    ungroup() %>%
    rbind(mutate(., state="All", countyName="All", fipsCounty="All")) %>%
    group_by(fipsCounty, countyName, state, cluster, monGroup) %>%
    summarize(deaths=sum(deaths), pop=sum(pop)) %>%
    mutate(dpm=1000000*deaths/pop, moddpm=dpm*ifelse(monGroup=="Sep and later", 61/43, 1)) %>%
    ungroup()

ciData <- allCountyRules %>%
    filter(fipsCounty != "All") %>%
    group_by(cluster, monGroup) %>%
    summarize(ci05=quantile(moddpm, 0.05), 
              ci25=quantile(moddpm, 0.25), 
              ci75=quantile(moddpm, 0.75), 
              ci95=quantile(moddpm, 0.95)
              ) %>%
    ungroup()

allCountyRules %>%
    ggplot(aes(x=monGroup)) + 
    geom_line(data=~filter(., fipsCounty=="All"), aes(y=moddpm, group=cluster, color=cluster), lwd=2) + 
    geom_ribbon(data=ciData, aes(ymin=ci25, ymax=ci75, group=cluster), alpha=0.5) +
    geom_ribbon(data=ciData, aes(ymin=ci05, ymax=ci95, group=cluster), fill="grey", alpha=0.5) +
    labs(x="", 
         y="Deaths per million (thru Oct 13*)", 
         title="US coronavirus deaths per million tracked by county", 
         subtitle="All facets on same y scale\nDark ribbon covers 50% of counties, light ribbon covers 90%", 
         caption="* Data through Oct 13 ('Sep and later' scaled by 61/43 to account for shorter time period)"
         ) + 
    ylim(c(0, NA)) +
    facet_wrap(~cluster)

Tentatively, the analysis will continue with the rules-based segmentation approach to county clusters since it seems to be driving meaningful differentiation for further analysis.

Suppose that evolution in cases is assessed using the county-level clusters:

allCountyRules <- cty_20201015_rule6$clusterStateData %>%
    filter(!is.na(date)) %>%
    mutate(mon=factor(month.abb[mon=lubridate::month(date)], levels=month.abb), 
                      monGroup=case_when(mon %in% c("Jan", "Feb", "Mar", "Apr") ~ "thru Apr", 
                              mon %in% c("May", "Jun") ~ "May-Jun", 
                              mon %in% c("Jul", "Aug") ~ "Jul-Aug", 
                              mon %in% c("Sep", "Oct") ~ "Sep and later",
                              TRUE ~ "Map error"
                              ), 
           monGroup=factor(monGroup, levels=c("thru Apr", "May-Jun", "Jul-Aug", "Sep and later", "Map error"))
           ) %>%
    group_by(fipsCounty, countyName, state, cluster, monGroup) %>%
    summarize(deaths=sum(deaths), cases=sum(cases), pop=mean(pop)) %>%
    ungroup() %>%
    rbind(mutate(., state="All", countyName="All", fipsCounty="All")) %>%
    group_by(fipsCounty, countyName, state, cluster, monGroup) %>%
    summarize(deaths=sum(deaths), cases=sum(cases), pop=sum(pop)) %>%
    mutate(dpm=1000000*deaths/pop, 
           moddpm=dpm*ifelse(monGroup=="Sep and later", 61/43, 1), 
           cpm=1000000*cases/pop, 
           modcpm=cpm*ifelse(monGroup=="Sep and later", 61/43, 1)
           ) %>%
    ungroup()

ciDataCases <- allCountyRules %>%
    filter(fipsCounty != "All") %>%
    group_by(cluster, monGroup) %>%
    summarize(ci05=quantile(modcpm, 0.05), 
              ci25=quantile(modcpm, 0.25), 
              ci75=quantile(modcpm, 0.75), 
              ci95=quantile(modcpm, 0.95)
              ) %>%
    ungroup()

allCountyRules %>%
    ggplot(aes(x=monGroup)) + 
    geom_line(data=~filter(., fipsCounty=="All"), aes(y=modcpm, group=cluster, color=cluster), lwd=2) + 
    geom_ribbon(data=ciDataCases, aes(ymin=ci25, ymax=ci75, group=cluster), alpha=0.5) +
    geom_ribbon(data=ciDataCases, aes(ymin=ci05, ymax=ci95, group=cluster), fill="grey", alpha=0.5) +
    labs(x="", 
         y="Cases per million (thru Oct 13*)", 
         title="US coronavirus cases per million tracked by county", 
         subtitle="All facets on same y scale\nDark ribbon covers 50% of counties, light ribbon covers 90%", 
         caption="* Data through Oct 13 ('Sep and later' scaled by 61/43 to account for shorter time period)"
         ) + 
    ylim(c(0, NA)) +
    facet_wrap(~cluster)

The shapes are broadly consistent, with the exception that the segment with early/high deaths is fairly low on total case counts. Suppose that county-level segments are instead created using a preponderance of cases rather than deaths:

# Create 3x2 clusters for hml and shape
popLoc <- "./RInputFiles/Coronavirus/covid_county_population_usafacts.csv"
cty_20201015_rule6_cases <- readRunUSAFacts(maxDate="2020-10-13", 
                                            popLoc=popLoc, 
                                            caseLoc=caseLoc, 
                                            deathLoc=deathLoc, 
                                            dlCaseDeath=!(file.exists(caseLoc) & file.exists(deathLoc)),
                                            oldFile=readFromRDS("burden_20200903_new"), 
                                            existingStateClusters=test_hier6_201014$useClusters,
                                            createClusters=TRUE, 
                                            hierarchical=NA,
                                            minShape=4,
                                            ratioDeathvsCase = 0.2,
                                            ratioTotalvsShape = 0.25,
                                            minDeath=100,
                                            minCase=5000
                                            )
## Parsed with column specification:
## cols(
##   countyFIPS = col_double(),
##   `County Name` = col_character(),
##   State = col_character(),
##   population = col_double()
## )
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumCases   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `County Name` = col_character(),
##   State = col_character()
## )
## See spec(...) for full column specifications.
## Observations: 849,870
## Variables: 6
## $ countyFIPS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ countyName <chr> "Statewide Unallocated", "Statewide Unallocated", "State...
## $ state      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "A...
## $ stateFIPS  <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ date       <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01...
## $ cumDeaths  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...

## # A tibble: 1 x 6
##   cpm_mean_is0 dpm_mean_is0 cpm_mean_lt100 dpm_mean_lt100 cpm_mean_lt5000
##          <dbl>        <dbl>          <dbl>          <dbl>           <dbl>
## 1            0       0.0157              0          0.147          0.0559
## # ... with 1 more variable: dpm_mean_lt5000 <dbl>

## 
## Recency is defined as 2020-09-14 through current
## 
## Recency is defined as 2020-09-14 through current
## Warning: Removed 1 rows containing missing values (geom_point).

## Joining, by = "fipsCounty"
## Joining, by = "fipsCounty"

Interestingly, there is no longer a segment with high deaths early since it is not well-differemtiated by having (relatively) high cases early. This is suggestive that deaths are a better metric than cases for tracking the historical impact of cornavirus by county (though given lags, cases are more likely to be a current leading indicator of burden).

Suppose that the movements by segment are examined:

caseCluster <- cty_20201015_rule6_cases$clusterStateData %>%
    count(fipsCounty, cluster) %>%
    rename(caseCluster=cluster) %>%
    select(-n)

cty_20201015_case_death <- cty_20201015_rule6$clusterStateData %>%
    rename(deathCluster=cluster) %>%
    full_join(caseCluster, by="fipsCounty")

cty_20201015_case_death %>%
    group_by(fipsCounty, deathCluster, caseCluster) %>%
    summarize(deaths=sum(deaths, na.rm=TRUE), cases=sum(cases, na.rm=TRUE), pop=mean(pop)) %>%
    group_by(deathCluster, caseCluster) %>%
    summarize(n=n(), pop=sum(pop), cases=sum(cases), deaths=sum(deaths)) %>%
    ggplot(aes(y=deathCluster, x=caseCluster)) + 
    geom_tile(aes(fill=deaths/cases)) + 
    geom_text(aes(label=paste0("n=", n, 
                               " (Pop: ", round(pop/1000000, 1), 
                               ")\nCases: ", round(cases/1000, 1), 
                               "\nDeaths: ", round(deaths/1000, 1), " (", round(100*deaths/cases, 1), "%)"
                               )
                  ), 
              size=3.5
              ) + 
    scale_fill_continuous("CFR", low="white", high="orange") + 
    labs(x="County cluster driven by cases", 
         y="County cluster driven by deaths", 
         title="County cluster comparison when driven by cases vs. deaths", 
         subtitle="n: # counties; Pop: population (millions); Cases: cases (000); Deaths: deaths (000) (CFR)"
         )

plotData <- cty_20201015_case_death %>%
    group_by(date, deathCluster, caseCluster) %>%
    summarize(deaths=sum(deaths, na.rm=TRUE), cases=sum(cases, na.rm=TRUE), pop=sum(pop)) %>%
    ungroup() %>%
    mutate(cpm=1000000*cases/pop, dpm=1000000*deaths/pop) %>%
    group_by(deathCluster, caseCluster) %>%
    mutate(cpm7=zoo::rollmean(cpm, k=7, fill=NA), dpm7=zoo::rollmean(dpm, k=7, fill=NA))

plotData %>%
    ggplot(aes(x=date, y=dpm7)) + 
    geom_line(aes(group=deathCluster, color=deathCluster)) + 
    facet_grid(deathCluster ~ caseCluster, switch="y") +
    labs(x="County case cluster", 
         y="County death cluster", 
         title="Deaths per million (7-day rolling mean) by county case cluster vs. county death cluster"
         )
## Warning: Removed 42 rows containing missing values (geom_path).

plotData %>%
    ggplot(aes(x=date, y=cpm7)) + 
    geom_line(aes(group=caseCluster, color=caseCluster)) + 
    facet_grid(deathCluster ~ caseCluster, switch="y") +
    labs(x="County case cluster", 
         y="County death cluster", 
         title="Cases per million (7-day rolling mean) by county case cluster vs. county death cluster"
         )
## Warning: Removed 42 rows containing missing values (geom_path).

Significant variation in case fatality rate (deaths over confirmed cases) by both geography and time drive significant changes in the segments. Further, the “high cases” segment consists of multiple shapes, including a group that is early and a group that us late.

Next steps are to explore why the death clusters seem to be better differentiated on shape than the case clusters even when plotting using cases. Perhaps something is not working as intended when calculating distances based on shape of the curve.